• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::arch::x86_64::CpuidResult;
6 use std::mem::size_of;
7 
8 use base::errno_result;
9 use base::error;
10 use base::ioctl;
11 use base::ioctl_with_mut_ptr;
12 use base::ioctl_with_mut_ref;
13 use base::ioctl_with_ptr;
14 use base::ioctl_with_ref;
15 use base::ioctl_with_val;
16 use base::AsRawDescriptor;
17 use base::Error;
18 use base::IoctlNr;
19 use base::MappedRegion;
20 use base::Result;
21 use data_model::vec_with_array_field;
22 use kvm_sys::*;
23 use libc::E2BIG;
24 use libc::EIO;
25 use libc::ENXIO;
26 use vm_memory::GuestAddress;
27 
28 use super::Config;
29 use super::Kvm;
30 use super::KvmVcpu;
31 use super::KvmVm;
32 use crate::get_tsc_offset_from_msr;
33 use crate::host_phys_addr_bits;
34 use crate::set_tsc_offset_via_msr;
35 use crate::ClockState;
36 use crate::CpuId;
37 use crate::CpuIdEntry;
38 use crate::DebugRegs;
39 use crate::DescriptorTable;
40 use crate::DeviceKind;
41 use crate::Fpu;
42 use crate::HypervisorX86_64;
43 use crate::IoapicRedirectionTableEntry;
44 use crate::IoapicState;
45 use crate::IrqSourceChip;
46 use crate::LapicState;
47 use crate::PicSelect;
48 use crate::PicState;
49 use crate::PitChannelState;
50 use crate::PitState;
51 use crate::ProtectionType;
52 use crate::Register;
53 use crate::Regs;
54 use crate::Segment;
55 use crate::Sregs;
56 use crate::VcpuEvents;
57 use crate::VcpuExceptionState;
58 use crate::VcpuExit;
59 use crate::VcpuInterruptState;
60 use crate::VcpuNmiState;
61 use crate::VcpuSmiState;
62 use crate::VcpuTripleFaultState;
63 use crate::VcpuX86_64;
64 use crate::VmCap;
65 use crate::VmX86_64;
66 use crate::Xsave;
67 use crate::MAX_IOAPIC_PINS;
68 use crate::NUM_IOAPIC_PINS;
69 
70 type KvmCpuId = kvm::CpuId;
71 const KVM_XSAVE_MAX_SIZE: i32 = 4096;
72 
get_cpuid_with_initial_capacity<T: AsRawDescriptor>( descriptor: &T, kind: IoctlNr, initial_capacity: usize, ) -> Result<CpuId>73 pub fn get_cpuid_with_initial_capacity<T: AsRawDescriptor>(
74     descriptor: &T,
75     kind: IoctlNr,
76     initial_capacity: usize,
77 ) -> Result<CpuId> {
78     let mut entries: usize = initial_capacity;
79 
80     loop {
81         let mut kvm_cpuid = KvmCpuId::new(entries);
82 
83         let ret = unsafe {
84             // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the
85             // memory allocated for the struct. The limit is read from nent within KvmCpuId,
86             // which is set to the allocated size above.
87             ioctl_with_mut_ptr(descriptor, kind, kvm_cpuid.as_mut_ptr())
88         };
89         if ret < 0 {
90             let err = Error::last();
91             match err.errno() {
92                 E2BIG => {
93                     // double the available memory for cpuid entries for kvm.
94                     if let Some(val) = entries.checked_mul(2) {
95                         entries = val;
96                     } else {
97                         return Err(err);
98                     }
99                 }
100                 _ => return Err(err),
101             }
102         } else {
103             return Ok(CpuId::from(&kvm_cpuid));
104         }
105     }
106 }
107 
108 impl Kvm {
get_cpuid(&self, kind: IoctlNr) -> Result<CpuId>109     pub fn get_cpuid(&self, kind: IoctlNr) -> Result<CpuId> {
110         const KVM_MAX_ENTRIES: usize = 256;
111         get_cpuid_with_initial_capacity(self, kind, KVM_MAX_ENTRIES)
112     }
113 
114     // The x86 machine type is always 0. Protected VMs are not supported.
get_vm_type(&self, protection_type: ProtectionType) -> Result<u32>115     pub fn get_vm_type(&self, protection_type: ProtectionType) -> Result<u32> {
116         if protection_type == ProtectionType::Unprotected {
117             Ok(0)
118         } else {
119             error!("Protected mode is not supported on x86_64.");
120             Err(Error::new(libc::EINVAL))
121         }
122     }
123 
124     /// Get the size of guest physical addresses in bits.
get_guest_phys_addr_bits(&self) -> u8125     pub fn get_guest_phys_addr_bits(&self) -> u8 {
126         // Assume the guest physical address size is the same as the host.
127         host_phys_addr_bits()
128     }
129 }
130 
131 impl HypervisorX86_64 for Kvm {
get_supported_cpuid(&self) -> Result<CpuId>132     fn get_supported_cpuid(&self) -> Result<CpuId> {
133         self.get_cpuid(KVM_GET_SUPPORTED_CPUID())
134     }
135 
get_emulated_cpuid(&self) -> Result<CpuId>136     fn get_emulated_cpuid(&self) -> Result<CpuId> {
137         self.get_cpuid(KVM_GET_EMULATED_CPUID())
138     }
139 
get_msr_index_list(&self) -> Result<Vec<u32>>140     fn get_msr_index_list(&self) -> Result<Vec<u32>> {
141         const MAX_KVM_MSR_ENTRIES: usize = 256;
142 
143         let mut msr_list = vec_with_array_field::<kvm_msr_list, u32>(MAX_KVM_MSR_ENTRIES);
144         msr_list[0].nmsrs = MAX_KVM_MSR_ENTRIES as u32;
145 
146         let ret = unsafe {
147             // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
148             // allocated for the struct. The limit is read from nmsrs, which is set to the allocated
149             // size (MAX_KVM_MSR_ENTRIES) above.
150             ioctl_with_mut_ref(self, KVM_GET_MSR_INDEX_LIST(), &mut msr_list[0])
151         };
152         if ret < 0 {
153             return errno_result();
154         }
155 
156         let mut nmsrs = msr_list[0].nmsrs;
157 
158         // Mapping the unsized array to a slice is unsafe because the length isn't known.  Using
159         // the length we originally allocated with eliminates the possibility of overflow.
160         let indices: &[u32] = unsafe {
161             if nmsrs > MAX_KVM_MSR_ENTRIES as u32 {
162                 nmsrs = MAX_KVM_MSR_ENTRIES as u32;
163             }
164             msr_list[0].indices.as_slice(nmsrs as usize)
165         };
166 
167         Ok(indices.to_vec())
168     }
169 }
170 
171 impl KvmVm {
172     /// Does platform specific initialization for the KvmVm.
init_arch(&self, _cfg: &Config) -> Result<()>173     pub fn init_arch(&self, _cfg: &Config) -> Result<()> {
174         Ok(())
175     }
176 
177     /// Checks if a particular `VmCap` is available, or returns None if arch-independent
178     /// Vm.check_capability() should handle the check.
check_capability_arch(&self, c: VmCap) -> Option<bool>179     pub fn check_capability_arch(&self, c: VmCap) -> Option<bool> {
180         match c {
181             VmCap::PvClock => Some(true),
182             _ => None,
183         }
184     }
185 
186     /// Returns the params to pass to KVM_CREATE_DEVICE for a `kind` device on this arch, or None to
187     /// let the arch-independent `KvmVm::create_device` handle it.
get_device_params_arch(&self, _kind: DeviceKind) -> Option<kvm_create_device>188     pub fn get_device_params_arch(&self, _kind: DeviceKind) -> Option<kvm_create_device> {
189         None
190     }
191 
192     /// Arch-specific implementation of `Vm::get_pvclock`.
get_pvclock_arch(&self) -> Result<ClockState>193     pub fn get_pvclock_arch(&self) -> Result<ClockState> {
194         // Safe because we know that our file is a VM fd, we know the kernel will only write correct
195         // amount of memory to our pointer, and we verify the return result.
196         let mut clock_data: kvm_clock_data = Default::default();
197         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock_data) };
198         if ret == 0 {
199             Ok(ClockState::from(clock_data))
200         } else {
201             errno_result()
202         }
203     }
204 
205     /// Arch-specific implementation of `Vm::set_pvclock`.
set_pvclock_arch(&self, state: &ClockState) -> Result<()>206     pub fn set_pvclock_arch(&self, state: &ClockState) -> Result<()> {
207         let clock_data = kvm_clock_data::from(*state);
208         // Safe because we know that our file is a VM fd, we know the kernel will only read correct
209         // amount of memory from our pointer, and we verify the return result.
210         let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), &clock_data) };
211         if ret == 0 {
212             Ok(())
213         } else {
214             errno_result()
215         }
216     }
217 
218     /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl.
219     ///
220     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
get_pic_state(&self, id: PicSelect) -> Result<kvm_pic_state>221     pub fn get_pic_state(&self, id: PicSelect) -> Result<kvm_pic_state> {
222         let mut irqchip_state = kvm_irqchip {
223             chip_id: id as u32,
224             ..Default::default()
225         };
226         let ret = unsafe {
227             // Safe because we know our file is a VM fd, we know the kernel will only write
228             // correct amount of memory to our pointer, and we verify the return result.
229             ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
230         };
231         if ret == 0 {
232             Ok(unsafe {
233                 // Safe as we know that we are retrieving data related to the
234                 // PIC (primary or secondary) and not IOAPIC.
235                 irqchip_state.chip.pic
236             })
237         } else {
238             errno_result()
239         }
240     }
241 
242     /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl.
243     ///
244     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
set_pic_state(&self, id: PicSelect, state: &kvm_pic_state) -> Result<()>245     pub fn set_pic_state(&self, id: PicSelect, state: &kvm_pic_state) -> Result<()> {
246         let mut irqchip_state = kvm_irqchip {
247             chip_id: id as u32,
248             ..Default::default()
249         };
250         irqchip_state.chip.pic = *state;
251         // Safe because we know that our file is a VM fd, we know the kernel will only read
252         // correct amount of memory from our pointer, and we verify the return result.
253         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
254         if ret == 0 {
255             Ok(())
256         } else {
257             errno_result()
258         }
259     }
260 
261     /// Retrieves the KVM_IOAPIC_NUM_PINS value for emulated IO-APIC.
get_ioapic_num_pins(&self) -> Result<usize>262     pub fn get_ioapic_num_pins(&self) -> Result<usize> {
263         // Safe because we know that our file is a KVM fd, and if the cap is invalid KVM assumes
264         // it's an unavailable extension and returns 0, producing default KVM_IOAPIC_NUM_PINS value.
265         match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), KVM_CAP_IOAPIC_NUM_PINS as u64) }
266         {
267             ret if ret < 0 => errno_result(),
268             ret => Ok((ret as usize).max(NUM_IOAPIC_PINS).min(MAX_IOAPIC_PINS)),
269         }
270     }
271 
272     /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl.
273     ///
274     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
get_ioapic_state(&self) -> Result<kvm_ioapic_state>275     pub fn get_ioapic_state(&self) -> Result<kvm_ioapic_state> {
276         let mut irqchip_state = kvm_irqchip {
277             chip_id: 2,
278             ..Default::default()
279         };
280         let ret = unsafe {
281             // Safe because we know our file is a VM fd, we know the kernel will only write
282             // correct amount of memory to our pointer, and we verify the return result.
283             ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
284         };
285         if ret == 0 {
286             Ok(unsafe {
287                 // Safe as we know that we are retrieving data related to the
288                 // IOAPIC and not PIC.
289                 irqchip_state.chip.ioapic
290             })
291         } else {
292             errno_result()
293         }
294     }
295 
296     /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl.
297     ///
298     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()>299     pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> {
300         let mut irqchip_state = kvm_irqchip {
301             chip_id: 2,
302             ..Default::default()
303         };
304         irqchip_state.chip.ioapic = *state;
305         // Safe because we know that our file is a VM fd, we know the kernel will only read
306         // correct amount of memory from our pointer, and we verify the return result.
307         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
308         if ret == 0 {
309             Ok(())
310         } else {
311             errno_result()
312         }
313     }
314 
315     /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl.
316     ///
317     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
create_pit(&self) -> Result<()>318     pub fn create_pit(&self) -> Result<()> {
319         let pit_config = kvm_pit_config::default();
320         // Safe because we know that our file is a VM fd, we know the kernel will only read the
321         // correct amount of memory from our pointer, and we verify the return result.
322         let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) };
323         if ret == 0 {
324             Ok(())
325         } else {
326             errno_result()
327         }
328     }
329 
330     /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl.
331     ///
332     /// Note that this call can only succeed after a call to `Vm::create_pit`.
get_pit_state(&self) -> Result<kvm_pit_state2>333     pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
334         // Safe because we know that our file is a VM fd, we know the kernel will only write
335         // correct amount of memory to our pointer, and we verify the return result.
336         let mut pit_state = Default::default();
337         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pit_state) };
338         if ret == 0 {
339             Ok(pit_state)
340         } else {
341             errno_result()
342         }
343     }
344 
345     /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl.
346     ///
347     /// Note that this call can only succeed after a call to `Vm::create_pit`.
set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()>348     pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
349         // Safe because we know that our file is a VM fd, we know the kernel will only read
350         // correct amount of memory from our pointer, and we verify the return result.
351         let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pit_state) };
352         if ret == 0 {
353             Ok(())
354         } else {
355             errno_result()
356         }
357     }
358 
359     /// Enable userspace msr.
enable_userspace_msr(&self) -> Result<()>360     pub fn enable_userspace_msr(&self) -> Result<()> {
361         let mut cap = kvm_enable_cap {
362             cap: KVM_CAP_X86_USER_SPACE_MSR,
363             ..Default::default()
364         };
365         cap.args[0] = (KVM_MSR_EXIT_REASON_UNKNOWN
366             | KVM_MSR_EXIT_REASON_INVAL
367             | KVM_MSR_EXIT_REASON_FILTER) as u64;
368 
369         // Safe because we know that our file is a VM fd, we know that the
370         // kernel will only read correct amount of memory from our pointer, and
371         // we verify the return result.
372         let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), &cap) };
373         if ret < 0 {
374             errno_result()
375         } else {
376             Ok(())
377         }
378     }
379 
380     /// Set MSR_PLATFORM_INFO read access.
set_platform_info_read_access(&self, allow_read: bool) -> Result<()>381     pub fn set_platform_info_read_access(&self, allow_read: bool) -> Result<()> {
382         let mut cap = kvm_enable_cap {
383             cap: KVM_CAP_MSR_PLATFORM_INFO,
384             ..Default::default()
385         };
386         cap.args[0] = allow_read as u64;
387 
388         // Safe because we know that our file is a VM fd, we know that the
389         // kernel will only read correct amount of memory from our pointer, and
390         // we verify the return result.
391         let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), &cap) };
392         if ret < 0 {
393             errno_result()
394         } else {
395             Ok(())
396         }
397     }
398 
399     /// Set msr filter.
set_msr_filter(&self, msr_list: (Vec<u32>, Vec<u32>)) -> Result<()>400     pub fn set_msr_filter(&self, msr_list: (Vec<u32>, Vec<u32>)) -> Result<()> {
401         let mut rd_nmsrs: u32 = 0;
402         let mut wr_nmsrs: u32 = 0;
403         let mut rd_msr_bitmap: [u8; KVM_MSR_FILTER_RANGE_MAX_BYTES] =
404             [0xff; KVM_MSR_FILTER_RANGE_MAX_BYTES];
405         let mut wr_msr_bitmap: [u8; KVM_MSR_FILTER_RANGE_MAX_BYTES] =
406             [0xff; KVM_MSR_FILTER_RANGE_MAX_BYTES];
407         let (rd_msrs, wr_msrs) = msr_list;
408 
409         for index in rd_msrs {
410             // currently we only consider the MSR lower than
411             // KVM_MSR_FILTER_RANGE_MAX_BITS
412             if index >= (KVM_MSR_FILTER_RANGE_MAX_BITS as u32) {
413                 continue;
414             }
415             rd_nmsrs += 1;
416             rd_msr_bitmap[(index / 8) as usize] &= !(1 << (index & 0x7));
417         }
418         for index in wr_msrs {
419             // currently we only consider the MSR lower than
420             // KVM_MSR_FILTER_RANGE_MAX_BITS
421             if index >= (KVM_MSR_FILTER_RANGE_MAX_BITS as u32) {
422                 continue;
423             }
424             wr_nmsrs += 1;
425             wr_msr_bitmap[(index / 8) as usize] &= !(1 << (index & 0x7));
426         }
427 
428         let mut msr_filter = kvm_msr_filter {
429             flags: KVM_MSR_FILTER_DEFAULT_ALLOW,
430             ..Default::default()
431         };
432 
433         let mut count = 0;
434         if rd_nmsrs > 0 {
435             msr_filter.ranges[count].flags = KVM_MSR_FILTER_READ;
436             msr_filter.ranges[count].nmsrs = KVM_MSR_FILTER_RANGE_MAX_BITS as u32;
437             msr_filter.ranges[count].base = 0x0;
438             msr_filter.ranges[count].bitmap = rd_msr_bitmap.as_mut_ptr();
439             count += 1;
440         }
441         if wr_nmsrs > 0 {
442             msr_filter.ranges[count].flags = KVM_MSR_FILTER_WRITE;
443             msr_filter.ranges[count].nmsrs = KVM_MSR_FILTER_RANGE_MAX_BITS as u32;
444             msr_filter.ranges[count].base = 0x0;
445             msr_filter.ranges[count].bitmap = wr_msr_bitmap.as_mut_ptr();
446             count += 1;
447         }
448 
449         let mut ret = 0;
450         if count > 0 {
451             // Safe because we know that our file is a VM fd, we know that the
452             // kernel will only read correct amount of memory from our pointer, and
453             // we verify the return result.
454             ret = unsafe { ioctl_with_ref(self, KVM_X86_SET_MSR_FILTER(), &msr_filter) };
455         }
456 
457         if ret < 0 {
458             errno_result()
459         } else {
460             Ok(())
461         }
462     }
463 
464     /// Enable support for split-irqchip.
enable_split_irqchip(&self, ioapic_pins: usize) -> Result<()>465     pub fn enable_split_irqchip(&self, ioapic_pins: usize) -> Result<()> {
466         let mut cap = kvm_enable_cap {
467             cap: KVM_CAP_SPLIT_IRQCHIP,
468             ..Default::default()
469         };
470         cap.args[0] = ioapic_pins as u64;
471         // safe becuase we allocated the struct and we know the kernel will read
472         // exactly the size of the struct
473         let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), &cap) };
474         if ret < 0 {
475             errno_result()
476         } else {
477             Ok(())
478         }
479     }
480 }
481 
482 impl VmX86_64 for KvmVm {
get_hypervisor(&self) -> &dyn HypervisorX86_64483     fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
484         &self.kvm
485     }
486 
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>487     fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
488         // create_vcpu is declared separately in VmAArch64 and VmX86, so it can return VcpuAArch64
489         // or VcpuX86.  But both use the same implementation in KvmVm::create_vcpu.
490         Ok(Box::new(KvmVm::create_kvm_vcpu(self, id)?))
491     }
492 
493     /// Sets the address of the three-page region in the VM's address space.
494     ///
495     /// See the documentation on the KVM_SET_TSS_ADDR ioctl.
set_tss_addr(&self, addr: GuestAddress) -> Result<()>496     fn set_tss_addr(&self, addr: GuestAddress) -> Result<()> {
497         // Safe because we know that our file is a VM fd and we verify the return result.
498         let ret = unsafe { ioctl_with_val(self, KVM_SET_TSS_ADDR(), addr.offset() as u64) };
499         if ret == 0 {
500             Ok(())
501         } else {
502             errno_result()
503         }
504     }
505 
506     /// Sets the address of a one-page region in the VM's address space.
507     ///
508     /// See the documentation on the KVM_SET_IDENTITY_MAP_ADDR ioctl.
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>509     fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()> {
510         // Safe because we know that our file is a VM fd and we verify the return result.
511         let ret =
512             unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &(addr.offset() as u64)) };
513         if ret == 0 {
514             Ok(())
515         } else {
516             errno_result()
517         }
518     }
519 }
520 
521 impl KvmVcpu {
522     /// Arch-specific implementation of `Vcpu::pvclock_ctrl`.
pvclock_ctrl_arch(&self) -> Result<()>523     pub fn pvclock_ctrl_arch(&self) -> Result<()> {
524         let ret = unsafe {
525             // The ioctl is safe because it does not read or write memory in this process.
526             ioctl(self, KVM_KVMCLOCK_CTRL())
527         };
528         if ret == 0 {
529             Ok(())
530         } else {
531             errno_result()
532         }
533     }
534 
535     /// Handles a `KVM_EXIT_SYSTEM_EVENT` with event type `KVM_SYSTEM_EVENT_RESET` with the given
536     /// event flags and returns the appropriate `VcpuExit` value for the run loop to handle.
system_event_reset(&self, _event_flags: u64) -> Result<VcpuExit>537     pub fn system_event_reset(&self, _event_flags: u64) -> Result<VcpuExit> {
538         Ok(VcpuExit::SystemEventReset)
539     }
540 }
541 
542 impl VcpuX86_64 for KvmVcpu {
543     #[allow(clippy::cast_ptr_alignment)]
set_interrupt_window_requested(&self, requested: bool)544     fn set_interrupt_window_requested(&self, requested: bool) {
545         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
546         // kernel told us how large it was. The pointer is page aligned so casting to a different
547         // type is well defined, hence the clippy allow attribute.
548         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
549         run.request_interrupt_window = requested.into();
550     }
551 
552     #[allow(clippy::cast_ptr_alignment)]
ready_for_interrupt(&self) -> bool553     fn ready_for_interrupt(&self) -> bool {
554         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
555         // kernel told us how large it was. The pointer is page aligned so casting to a different
556         // type is well defined, hence the clippy allow attribute.
557         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
558         run.ready_for_interrupt_injection != 0 && run.if_flag != 0
559     }
560 
561     /// Use the KVM_INTERRUPT ioctl to inject the specified interrupt vector.
562     ///
563     /// While this ioctl exists on PPC and MIPS as well as x86, the semantics are different and
564     /// ChromeOS doesn't support PPC or MIPS.
interrupt(&self, irq: u32) -> Result<()>565     fn interrupt(&self, irq: u32) -> Result<()> {
566         let interrupt = kvm_interrupt { irq };
567         // safe becuase we allocated the struct and we know the kernel will read
568         // exactly the size of the struct
569         let ret = unsafe { ioctl_with_ref(self, KVM_INTERRUPT(), &interrupt) };
570         if ret == 0 {
571             Ok(())
572         } else {
573             errno_result()
574         }
575     }
576 
inject_nmi(&self) -> Result<()>577     fn inject_nmi(&self) -> Result<()> {
578         // Safe because we know that our file is a VCPU fd.
579         let ret = unsafe { ioctl(self, KVM_NMI()) };
580         if ret == 0 {
581             Ok(())
582         } else {
583             errno_result()
584         }
585     }
586 
get_regs(&self) -> Result<Regs>587     fn get_regs(&self) -> Result<Regs> {
588         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
589         // correct amount of memory from our pointer, and we verify the return result.
590         let mut regs: kvm_regs = Default::default();
591         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) };
592         if ret == 0 {
593             Ok(Regs::from(&regs))
594         } else {
595             errno_result()
596         }
597     }
598 
set_regs(&self, regs: &Regs) -> Result<()>599     fn set_regs(&self, regs: &Regs) -> Result<()> {
600         let regs = kvm_regs::from(regs);
601         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
602         // correct amount of memory from our pointer, and we verify the return result.
603         let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), &regs) };
604         if ret == 0 {
605             Ok(())
606         } else {
607             errno_result()
608         }
609     }
610 
get_sregs(&self) -> Result<Sregs>611     fn get_sregs(&self) -> Result<Sregs> {
612         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
613         // correct amount of memory to our pointer, and we verify the return result.
614         let mut regs: kvm_sregs = Default::default();
615         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) };
616         if ret == 0 {
617             Ok(Sregs::from(&regs))
618         } else {
619             errno_result()
620         }
621     }
622 
set_sregs(&self, sregs: &Sregs) -> Result<()>623     fn set_sregs(&self, sregs: &Sregs) -> Result<()> {
624         // Get the current `kvm_sregs` so we can use its `apic_base` and `interrupt_bitmap`, which
625         // are not present in `Sregs`.
626         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
627         // correct amount of memory to our pointer, and we verify the return result.
628         let mut kvm_sregs: kvm_sregs = Default::default();
629         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut kvm_sregs) };
630         if ret != 0 {
631             return errno_result();
632         }
633 
634         kvm_sregs.cs = kvm_segment::from(&sregs.cs);
635         kvm_sregs.ds = kvm_segment::from(&sregs.ds);
636         kvm_sregs.es = kvm_segment::from(&sregs.es);
637         kvm_sregs.fs = kvm_segment::from(&sregs.fs);
638         kvm_sregs.gs = kvm_segment::from(&sregs.gs);
639         kvm_sregs.ss = kvm_segment::from(&sregs.ss);
640         kvm_sregs.tr = kvm_segment::from(&sregs.tr);
641         kvm_sregs.ldt = kvm_segment::from(&sregs.ldt);
642         kvm_sregs.gdt = kvm_dtable::from(&sregs.gdt);
643         kvm_sregs.idt = kvm_dtable::from(&sregs.idt);
644         kvm_sregs.cr0 = sregs.cr0;
645         kvm_sregs.cr2 = sregs.cr2;
646         kvm_sregs.cr3 = sregs.cr3;
647         kvm_sregs.cr4 = sregs.cr4;
648         kvm_sregs.cr8 = sregs.cr8;
649         kvm_sregs.efer = sregs.efer;
650 
651         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
652         // correct amount of memory from our pointer, and we verify the return result.
653         let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), &kvm_sregs) };
654         if ret == 0 {
655             Ok(())
656         } else {
657             errno_result()
658         }
659     }
660 
get_fpu(&self) -> Result<Fpu>661     fn get_fpu(&self) -> Result<Fpu> {
662         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
663         // correct amount of memory to our pointer, and we verify the return result.
664         let mut fpu: kvm_fpu = Default::default();
665         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut fpu) };
666         if ret == 0 {
667             Ok(Fpu::from(&fpu))
668         } else {
669             errno_result()
670         }
671     }
672 
set_fpu(&self, fpu: &Fpu) -> Result<()>673     fn set_fpu(&self, fpu: &Fpu) -> Result<()> {
674         let fpu = kvm_fpu::from(fpu);
675         let ret = unsafe {
676             // Here we trust the kernel not to read past the end of the kvm_fpu struct.
677             ioctl_with_ref(self, KVM_SET_FPU(), &fpu)
678         };
679         if ret == 0 {
680             Ok(())
681         } else {
682             errno_result()
683         }
684     }
685 
686     /// If the VM reports using XSave2, the function will call XSave2.
get_xsave(&self) -> Result<Xsave>687     fn get_xsave(&self) -> Result<Xsave> {
688         // Safe because we know that our file is a VM fd, we know that the
689         // kernel will only read correct amount of memory from our pointer, and
690         // we verify the return result.
691         // Get the size of Xsave in bytes. Values are of type u32.
692         let size =
693             unsafe { ioctl_with_val(&self.vm, KVM_CHECK_EXTENSION(), KVM_CAP_XSAVE2 as u64) };
694         if size < 0 {
695             return errno_result();
696         }
697         // Size / sizeof(u32) = len of vec.
698         let mut xsave: Vec<u32> = vec![0u32; size as usize / size_of::<u32>()];
699         let ioctl_nr = if size > KVM_XSAVE_MAX_SIZE {
700             KVM_GET_XSAVE2()
701         } else {
702             KVM_GET_XSAVE()
703         };
704         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
705         // correct amount of memory to our pointer, and we verify the return result.
706         let ret = unsafe { ioctl_with_mut_ptr(self, ioctl_nr, xsave.as_mut_ptr()) };
707         if ret == 0 {
708             Ok(Xsave::from(xsave))
709         } else {
710             errno_result()
711         }
712     }
713 
set_xsave(&self, xsave: &Xsave) -> Result<()>714     fn set_xsave(&self, xsave: &Xsave) -> Result<()> {
715         // Safe because we know that our file is a VM fd, we know that the
716         // kernel will only read correct amount of memory from our pointer, and
717         // get size from KVM_CAP_XSAVE2. Will return at least 4096 as a value if XSAVE2 is not
718         // supported or if no extensions are enabled. Otherwise it will return a value higher than
719         // 4096.
720         let size =
721             unsafe { ioctl_with_val(&self.vm, KVM_CHECK_EXTENSION(), KVM_CAP_XSAVE2 as u64) };
722         if size < 0 {
723             return errno_result();
724         }
725         // Ensure xsave is the same size as used in get_xsave.
726         // Return err if sizes don't match => not the same extensions are enabled for CPU.
727         if xsave.0.len() != size as usize / size_of::<u32>() {
728             return Err(Error::new(EIO));
729         }
730 
731         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
732         // correct amount of memory to our pointer, and we verify the return result.
733         // Because of the len check above, and because the layout of `struct kvm_xsave` is
734         // compatible with a slice of `u32`, we can pass the pointer to `xsave` directly.
735         let ret = unsafe { ioctl_with_ptr(self, KVM_SET_XSAVE(), xsave.0.as_ptr()) };
736         if ret == 0 {
737             Ok(())
738         } else {
739             errno_result()
740         }
741     }
742 
get_vcpu_events(&self) -> Result<VcpuEvents>743     fn get_vcpu_events(&self) -> Result<VcpuEvents> {
744         let mut vcpu_evts: kvm_vcpu_events = Default::default();
745         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut vcpu_evts) };
746         if ret == 0 {
747             Ok(VcpuEvents::from(&vcpu_evts))
748         } else {
749             errno_result()
750         }
751     }
752 
set_vcpu_events(&self, vcpu_evts: &VcpuEvents) -> Result<()>753     fn set_vcpu_events(&self, vcpu_evts: &VcpuEvents) -> Result<()> {
754         let vcpu_events = kvm_vcpu_events::from(vcpu_evts);
755         let ret = unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), &vcpu_events) };
756         if ret == 0 {
757             Ok(())
758         } else {
759             errno_result()
760         }
761     }
762 
get_debugregs(&self) -> Result<DebugRegs>763     fn get_debugregs(&self) -> Result<DebugRegs> {
764         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
765         // correct amount of memory to our pointer, and we verify the return result.
766         let mut regs: kvm_debugregs = Default::default();
767         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut regs) };
768         if ret == 0 {
769             Ok(DebugRegs::from(&regs))
770         } else {
771             errno_result()
772         }
773     }
774 
set_debugregs(&self, dregs: &DebugRegs) -> Result<()>775     fn set_debugregs(&self, dregs: &DebugRegs) -> Result<()> {
776         let dregs = kvm_debugregs::from(dregs);
777         let ret = unsafe {
778             // Here we trust the kernel not to read past the end of the kvm_debugregs struct.
779             ioctl_with_ref(self, KVM_SET_DEBUGREGS(), &dregs)
780         };
781         if ret == 0 {
782             Ok(())
783         } else {
784             errno_result()
785         }
786     }
787 
get_xcrs(&self) -> Result<Vec<Register>>788     fn get_xcrs(&self) -> Result<Vec<Register>> {
789         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
790         // correct amount of memory to our pointer, and we verify the return result.
791         let mut regs: kvm_xcrs = Default::default();
792         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut regs) };
793         if ret == 0 {
794             Ok(from_kvm_xcrs(&regs))
795         } else {
796             errno_result()
797         }
798     }
799 
set_xcrs(&self, xcrs: &[Register]) -> Result<()>800     fn set_xcrs(&self, xcrs: &[Register]) -> Result<()> {
801         let xcrs = to_kvm_xcrs(xcrs);
802         let ret = unsafe {
803             // Here we trust the kernel not to read past the end of the kvm_xcrs struct.
804             ioctl_with_ref(self, KVM_SET_XCRS(), &xcrs)
805         };
806         if ret == 0 {
807             Ok(())
808         } else {
809             errno_result()
810         }
811     }
812 
get_msrs(&self, vec: &mut Vec<Register>) -> Result<()>813     fn get_msrs(&self, vec: &mut Vec<Register>) -> Result<()> {
814         let msrs = to_kvm_msrs(vec);
815         let ret = unsafe {
816             // Here we trust the kernel not to read or write past the end of the kvm_msrs struct.
817             ioctl_with_ref(self, KVM_GET_MSRS(), &msrs[0])
818         };
819         // KVM_GET_MSRS actually returns the number of msr entries written.
820         if ret < 0 {
821             return errno_result();
822         }
823         // Safe because we trust the kernel to return the correct array length on success.
824         let entries = unsafe {
825             let count = ret as usize;
826             assert!(count <= vec.len());
827             msrs[0].entries.as_slice(count)
828         };
829         vec.truncate(0);
830         vec.extend(entries.iter().map(|e| Register {
831             id: e.index,
832             value: e.data,
833         }));
834         Ok(())
835     }
836 
get_all_msrs(&self) -> Result<Vec<Register>>837     fn get_all_msrs(&self) -> Result<Vec<Register>> {
838         let mut msrs = self
839             .kvm
840             .get_msr_index_list()?
841             .into_iter()
842             .map(|i| Register { id: i, value: 0 })
843             .collect();
844         self.get_msrs(&mut msrs)?;
845         Ok(msrs)
846     }
847 
set_msrs(&self, vec: &[Register]) -> Result<()>848     fn set_msrs(&self, vec: &[Register]) -> Result<()> {
849         let msrs = to_kvm_msrs(vec);
850         let ret = unsafe {
851             // Here we trust the kernel not to read past the end of the kvm_msrs struct.
852             ioctl_with_ref(self, KVM_SET_MSRS(), &msrs[0])
853         };
854         // KVM_SET_MSRS actually returns the number of msr entries written.
855         if ret >= 0 {
856             Ok(())
857         } else {
858             errno_result()
859         }
860     }
861 
set_cpuid(&self, cpuid: &CpuId) -> Result<()>862     fn set_cpuid(&self, cpuid: &CpuId) -> Result<()> {
863         let cpuid = KvmCpuId::from(cpuid);
864         let ret = unsafe {
865             // Here we trust the kernel not to read past the end of the kvm_msrs struct.
866             ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_ptr())
867         };
868         if ret == 0 {
869             Ok(())
870         } else {
871             errno_result()
872         }
873     }
874 
get_hyperv_cpuid(&self) -> Result<CpuId>875     fn get_hyperv_cpuid(&self) -> Result<CpuId> {
876         const KVM_MAX_ENTRIES: usize = 256;
877         get_cpuid_with_initial_capacity(self, KVM_GET_SUPPORTED_HV_CPUID(), KVM_MAX_ENTRIES)
878     }
879 
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>880     fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()> {
881         use kvm_sys::*;
882         let mut dbg: kvm_guest_debug = Default::default();
883 
884         if addrs.len() > 4 {
885             error!(
886                 "Support 4 breakpoints at most but {} addresses are passed",
887                 addrs.len()
888             );
889             return Err(base::Error::new(libc::EINVAL));
890         }
891 
892         dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
893         if enable_singlestep {
894             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
895         }
896 
897         // Set bits 9 and 10.
898         // bit 9: GE (global exact breakpoint enable) flag.
899         // bit 10: always 1.
900         dbg.arch.debugreg[7] = 0x0600;
901 
902         for (i, addr) in addrs.iter().enumerate() {
903             dbg.arch.debugreg[i] = addr.0;
904             // Set global breakpoint enable flag
905             dbg.arch.debugreg[7] |= 2 << (i * 2);
906         }
907 
908         let ret = unsafe {
909             // Here we trust the kernel not to read past the end of the kvm_guest_debug struct.
910             ioctl_with_ref(self, KVM_SET_GUEST_DEBUG(), &dbg)
911         };
912         if ret == 0 {
913             Ok(())
914         } else {
915             errno_result()
916         }
917     }
918 
919     /// KVM does not support the VcpuExit::Cpuid exit type.
handle_cpuid(&mut self, _entry: &CpuIdEntry) -> Result<()>920     fn handle_cpuid(&mut self, _entry: &CpuIdEntry) -> Result<()> {
921         Err(Error::new(ENXIO))
922     }
923 
get_tsc_offset(&self) -> Result<u64>924     fn get_tsc_offset(&self) -> Result<u64> {
925         // Use the default MSR-based implementation
926         get_tsc_offset_from_msr(self)
927     }
928 
set_tsc_offset(&self, offset: u64) -> Result<()>929     fn set_tsc_offset(&self, offset: u64) -> Result<()> {
930         // Use the default MSR-based implementation
931         set_tsc_offset_via_msr(self, offset)
932     }
933 }
934 
935 impl KvmVcpu {
936     /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt Controller".
937     ///
938     /// See the documentation for KVM_GET_LAPIC.
get_lapic(&self) -> Result<kvm_lapic_state>939     pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
940         let mut klapic: kvm_lapic_state = Default::default();
941 
942         let ret = unsafe {
943             // The ioctl is unsafe unless you trust the kernel not to write past the end of the
944             // local_apic struct.
945             ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic)
946         };
947         if ret < 0 {
948             return errno_result();
949         }
950         Ok(klapic)
951     }
952 
953     /// X86 specific call to set the state of the "Local Advanced Programmable Interrupt Controller".
954     ///
955     /// See the documentation for KVM_SET_LAPIC.
set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()>956     pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
957         let ret = unsafe {
958             // The ioctl is safe because the kernel will only read from the klapic struct.
959             ioctl_with_ref(self, KVM_SET_LAPIC(), klapic)
960         };
961         if ret < 0 {
962             return errno_result();
963         }
964         Ok(())
965     }
966 }
967 
968 impl<'a> From<&'a KvmCpuId> for CpuId {
from(kvm_cpuid: &'a KvmCpuId) -> CpuId969     fn from(kvm_cpuid: &'a KvmCpuId) -> CpuId {
970         let kvm_entries = kvm_cpuid.entries_slice();
971         let mut cpu_id_entries = Vec::with_capacity(kvm_entries.len());
972 
973         for entry in kvm_entries {
974             let cpu_id_entry = CpuIdEntry {
975                 function: entry.function,
976                 index: entry.index,
977                 flags: entry.flags,
978                 cpuid: CpuidResult {
979                     eax: entry.eax,
980                     ebx: entry.ebx,
981                     ecx: entry.ecx,
982                     edx: entry.edx,
983                 },
984             };
985             cpu_id_entries.push(cpu_id_entry)
986         }
987         CpuId { cpu_id_entries }
988     }
989 }
990 
991 impl From<&CpuId> for KvmCpuId {
from(cpuid: &CpuId) -> KvmCpuId992     fn from(cpuid: &CpuId) -> KvmCpuId {
993         let mut kvm = KvmCpuId::new(cpuid.cpu_id_entries.len());
994         let entries = kvm.mut_entries_slice();
995         for (i, &e) in cpuid.cpu_id_entries.iter().enumerate() {
996             entries[i] = kvm_cpuid_entry2 {
997                 function: e.function,
998                 index: e.index,
999                 flags: e.flags,
1000                 eax: e.cpuid.eax,
1001                 ebx: e.cpuid.ebx,
1002                 ecx: e.cpuid.ecx,
1003                 edx: e.cpuid.edx,
1004                 ..Default::default()
1005             };
1006         }
1007         kvm
1008     }
1009 }
1010 
1011 impl From<ClockState> for kvm_clock_data {
from(state: ClockState) -> Self1012     fn from(state: ClockState) -> Self {
1013         kvm_clock_data {
1014             clock: state.clock,
1015             flags: state.flags,
1016             ..Default::default()
1017         }
1018     }
1019 }
1020 
1021 impl From<kvm_clock_data> for ClockState {
from(clock_data: kvm_clock_data) -> Self1022     fn from(clock_data: kvm_clock_data) -> Self {
1023         ClockState {
1024             clock: clock_data.clock,
1025             flags: clock_data.flags,
1026         }
1027     }
1028 }
1029 
1030 impl From<&kvm_pic_state> for PicState {
from(item: &kvm_pic_state) -> Self1031     fn from(item: &kvm_pic_state) -> Self {
1032         PicState {
1033             last_irr: item.last_irr,
1034             irr: item.irr,
1035             imr: item.imr,
1036             isr: item.isr,
1037             priority_add: item.priority_add,
1038             irq_base: item.irq_base,
1039             read_reg_select: item.read_reg_select != 0,
1040             poll: item.poll != 0,
1041             special_mask: item.special_mask != 0,
1042             init_state: item.init_state.into(),
1043             auto_eoi: item.auto_eoi != 0,
1044             rotate_on_auto_eoi: item.rotate_on_auto_eoi != 0,
1045             special_fully_nested_mode: item.special_fully_nested_mode != 0,
1046             use_4_byte_icw: item.init4 != 0,
1047             elcr: item.elcr,
1048             elcr_mask: item.elcr_mask,
1049         }
1050     }
1051 }
1052 
1053 impl From<&PicState> for kvm_pic_state {
from(item: &PicState) -> Self1054     fn from(item: &PicState) -> Self {
1055         kvm_pic_state {
1056             last_irr: item.last_irr,
1057             irr: item.irr,
1058             imr: item.imr,
1059             isr: item.isr,
1060             priority_add: item.priority_add,
1061             irq_base: item.irq_base,
1062             read_reg_select: item.read_reg_select as u8,
1063             poll: item.poll as u8,
1064             special_mask: item.special_mask as u8,
1065             init_state: item.init_state as u8,
1066             auto_eoi: item.auto_eoi as u8,
1067             rotate_on_auto_eoi: item.rotate_on_auto_eoi as u8,
1068             special_fully_nested_mode: item.special_fully_nested_mode as u8,
1069             init4: item.use_4_byte_icw as u8,
1070             elcr: item.elcr,
1071             elcr_mask: item.elcr_mask,
1072         }
1073     }
1074 }
1075 
1076 impl From<&kvm_ioapic_state> for IoapicState {
from(item: &kvm_ioapic_state) -> Self1077     fn from(item: &kvm_ioapic_state) -> Self {
1078         let mut state = IoapicState {
1079             base_address: item.base_address,
1080             ioregsel: item.ioregsel as u8,
1081             ioapicid: item.id,
1082             current_interrupt_level_bitmap: item.irr,
1083             redirect_table: [IoapicRedirectionTableEntry::default(); 120],
1084         };
1085         for (in_state, out_state) in item.redirtbl.iter().zip(state.redirect_table.iter_mut()) {
1086             *out_state = in_state.into();
1087         }
1088         state
1089     }
1090 }
1091 
1092 impl From<&IoapicRedirectionTableEntry> for kvm_ioapic_state__bindgen_ty_1 {
from(item: &IoapicRedirectionTableEntry) -> Self1093     fn from(item: &IoapicRedirectionTableEntry) -> Self {
1094         kvm_ioapic_state__bindgen_ty_1 {
1095             // IoapicRedirectionTableEntry layout matches the exact bit layout of a hardware
1096             // ioapic redirection table entry, so we can simply do a 64-bit copy
1097             bits: item.get(0, 64),
1098         }
1099     }
1100 }
1101 
1102 impl From<&kvm_ioapic_state__bindgen_ty_1> for IoapicRedirectionTableEntry {
from(item: &kvm_ioapic_state__bindgen_ty_1) -> Self1103     fn from(item: &kvm_ioapic_state__bindgen_ty_1) -> Self {
1104         let mut entry = IoapicRedirectionTableEntry::default();
1105         // Safe because the 64-bit layout of the IoapicRedirectionTableEntry matches the kvm_sys
1106         // table entry layout
1107         entry.set(0, 64, unsafe { item.bits as u64 });
1108         entry
1109     }
1110 }
1111 
1112 impl From<&IoapicState> for kvm_ioapic_state {
from(item: &IoapicState) -> Self1113     fn from(item: &IoapicState) -> Self {
1114         let mut state = kvm_ioapic_state {
1115             base_address: item.base_address,
1116             ioregsel: item.ioregsel as u32,
1117             id: item.ioapicid,
1118             irr: item.current_interrupt_level_bitmap,
1119             ..Default::default()
1120         };
1121         for (in_state, out_state) in item.redirect_table.iter().zip(state.redirtbl.iter_mut()) {
1122             *out_state = in_state.into();
1123         }
1124         state
1125     }
1126 }
1127 
1128 impl From<&LapicState> for kvm_lapic_state {
from(item: &LapicState) -> Self1129     fn from(item: &LapicState) -> Self {
1130         let mut state = kvm_lapic_state::default();
1131         // There are 64 lapic registers
1132         for (reg, value) in item.regs.iter().enumerate() {
1133             // Each lapic register is 16 bytes, but only the first 4 are used
1134             let reg_offset = 16 * reg;
1135             let regs_slice = &mut state.regs[reg_offset..reg_offset + 4];
1136 
1137             // to_le_bytes() produces an array of u8, not i8(c_char), so we can't directly use
1138             // copy_from_slice().
1139             for (i, v) in value.to_le_bytes().iter().enumerate() {
1140                 regs_slice[i] = *v as i8;
1141             }
1142         }
1143         state
1144     }
1145 }
1146 
1147 impl From<&kvm_lapic_state> for LapicState {
from(item: &kvm_lapic_state) -> Self1148     fn from(item: &kvm_lapic_state) -> Self {
1149         let mut state = LapicState { regs: [0; 64] };
1150         // There are 64 lapic registers
1151         for reg in 0..64 {
1152             // Each lapic register is 16 bytes, but only the first 4 are used
1153             let reg_offset = 16 * reg;
1154 
1155             // from_le_bytes() only works on arrays of u8, not i8(c_char).
1156             let reg_slice = &item.regs[reg_offset..reg_offset + 4];
1157             let mut bytes = [0u8; 4];
1158             for i in 0..4 {
1159                 bytes[i] = reg_slice[i] as u8;
1160             }
1161             state.regs[reg] = u32::from_le_bytes(bytes);
1162         }
1163         state
1164     }
1165 }
1166 
1167 impl From<&PitState> for kvm_pit_state2 {
from(item: &PitState) -> Self1168     fn from(item: &PitState) -> Self {
1169         kvm_pit_state2 {
1170             channels: [
1171                 kvm_pit_channel_state::from(&item.channels[0]),
1172                 kvm_pit_channel_state::from(&item.channels[1]),
1173                 kvm_pit_channel_state::from(&item.channels[2]),
1174             ],
1175             flags: item.flags,
1176             ..Default::default()
1177         }
1178     }
1179 }
1180 
1181 impl From<&kvm_pit_state2> for PitState {
from(item: &kvm_pit_state2) -> Self1182     fn from(item: &kvm_pit_state2) -> Self {
1183         PitState {
1184             channels: [
1185                 PitChannelState::from(&item.channels[0]),
1186                 PitChannelState::from(&item.channels[1]),
1187                 PitChannelState::from(&item.channels[2]),
1188             ],
1189             flags: item.flags,
1190         }
1191     }
1192 }
1193 
1194 impl From<&PitChannelState> for kvm_pit_channel_state {
from(item: &PitChannelState) -> Self1195     fn from(item: &PitChannelState) -> Self {
1196         kvm_pit_channel_state {
1197             count: item.count,
1198             latched_count: item.latched_count,
1199             count_latched: item.count_latched as u8,
1200             status_latched: item.status_latched as u8,
1201             status: item.status,
1202             read_state: item.read_state as u8,
1203             write_state: item.write_state as u8,
1204             // kvm's write_latch only stores the low byte of the reload value
1205             write_latch: item.reload_value as u8,
1206             rw_mode: item.rw_mode as u8,
1207             mode: item.mode,
1208             bcd: item.bcd as u8,
1209             gate: item.gate as u8,
1210             count_load_time: item.count_load_time as i64,
1211         }
1212     }
1213 }
1214 
1215 impl From<&kvm_pit_channel_state> for PitChannelState {
from(item: &kvm_pit_channel_state) -> Self1216     fn from(item: &kvm_pit_channel_state) -> Self {
1217         PitChannelState {
1218             count: item.count,
1219             latched_count: item.latched_count,
1220             count_latched: item.count_latched.into(),
1221             status_latched: item.status_latched != 0,
1222             status: item.status,
1223             read_state: item.read_state.into(),
1224             write_state: item.write_state.into(),
1225             // kvm's write_latch only stores the low byte of the reload value
1226             reload_value: item.write_latch as u16,
1227             rw_mode: item.rw_mode.into(),
1228             mode: item.mode,
1229             bcd: item.bcd != 0,
1230             gate: item.gate != 0,
1231             count_load_time: item.count_load_time as u64,
1232         }
1233     }
1234 }
1235 
1236 // This function translates an IrqSrouceChip to the kvm u32 equivalent. It has a different
1237 // implementation between x86_64 and aarch64 because the irqchip KVM constants are not defined on
1238 // all architectures.
chip_to_kvm_chip(chip: IrqSourceChip) -> u321239 pub(super) fn chip_to_kvm_chip(chip: IrqSourceChip) -> u32 {
1240     match chip {
1241         IrqSourceChip::PicPrimary => KVM_IRQCHIP_PIC_MASTER,
1242         IrqSourceChip::PicSecondary => KVM_IRQCHIP_PIC_SLAVE,
1243         IrqSourceChip::Ioapic => KVM_IRQCHIP_IOAPIC,
1244         _ => {
1245             error!("Invalid IrqChipSource for X86 {:?}", chip);
1246             0
1247         }
1248     }
1249 }
1250 
1251 impl From<&kvm_regs> for Regs {
from(r: &kvm_regs) -> Self1252     fn from(r: &kvm_regs) -> Self {
1253         Regs {
1254             rax: r.rax,
1255             rbx: r.rbx,
1256             rcx: r.rcx,
1257             rdx: r.rdx,
1258             rsi: r.rsi,
1259             rdi: r.rdi,
1260             rsp: r.rsp,
1261             rbp: r.rbp,
1262             r8: r.r8,
1263             r9: r.r9,
1264             r10: r.r10,
1265             r11: r.r11,
1266             r12: r.r12,
1267             r13: r.r13,
1268             r14: r.r14,
1269             r15: r.r15,
1270             rip: r.rip,
1271             rflags: r.rflags,
1272         }
1273     }
1274 }
1275 
1276 impl From<&Regs> for kvm_regs {
from(r: &Regs) -> Self1277     fn from(r: &Regs) -> Self {
1278         kvm_regs {
1279             rax: r.rax,
1280             rbx: r.rbx,
1281             rcx: r.rcx,
1282             rdx: r.rdx,
1283             rsi: r.rsi,
1284             rdi: r.rdi,
1285             rsp: r.rsp,
1286             rbp: r.rbp,
1287             r8: r.r8,
1288             r9: r.r9,
1289             r10: r.r10,
1290             r11: r.r11,
1291             r12: r.r12,
1292             r13: r.r13,
1293             r14: r.r14,
1294             r15: r.r15,
1295             rip: r.rip,
1296             rflags: r.rflags,
1297         }
1298     }
1299 }
1300 
1301 impl From<&VcpuEvents> for kvm_vcpu_events {
from(ve: &VcpuEvents) -> Self1302     fn from(ve: &VcpuEvents) -> Self {
1303         let mut kvm_ve: kvm_vcpu_events = Default::default();
1304 
1305         kvm_ve.exception.injected = ve.exception.injected as u8;
1306         kvm_ve.exception.nr = ve.exception.nr;
1307         kvm_ve.exception.has_error_code = ve.exception.has_error_code as u8;
1308         if let Some(pending) = ve.exception.pending {
1309             kvm_ve.exception.pending = pending as u8;
1310             if ve.exception_payload.is_some() {
1311                 kvm_ve.exception_has_payload = true as u8;
1312             }
1313             kvm_ve.exception_payload = ve.exception_payload.unwrap_or(0);
1314             kvm_ve.flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
1315         }
1316         kvm_ve.exception.error_code = ve.exception.error_code;
1317 
1318         kvm_ve.interrupt.injected = ve.interrupt.injected as u8;
1319         kvm_ve.interrupt.nr = ve.interrupt.nr;
1320         kvm_ve.interrupt.soft = ve.interrupt.soft as u8;
1321         if let Some(shadow) = ve.interrupt.shadow {
1322             kvm_ve.interrupt.shadow = shadow;
1323             kvm_ve.flags |= KVM_VCPUEVENT_VALID_SHADOW;
1324         }
1325 
1326         kvm_ve.nmi.injected = ve.nmi.injected as u8;
1327         if let Some(pending) = ve.nmi.pending {
1328             kvm_ve.nmi.pending = pending as u8;
1329             kvm_ve.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
1330         }
1331         kvm_ve.nmi.masked = ve.nmi.masked as u8;
1332 
1333         if let Some(sipi_vector) = ve.sipi_vector {
1334             kvm_ve.sipi_vector = sipi_vector;
1335             kvm_ve.flags |= KVM_VCPUEVENT_VALID_SIPI_VECTOR;
1336         }
1337 
1338         if let Some(smm) = ve.smi.smm {
1339             kvm_ve.smi.smm = smm as u8;
1340             kvm_ve.flags |= KVM_VCPUEVENT_VALID_SMM;
1341         }
1342         kvm_ve.smi.pending = ve.smi.pending as u8;
1343         kvm_ve.smi.smm_inside_nmi = ve.smi.smm_inside_nmi as u8;
1344         kvm_ve.smi.latched_init = ve.smi.latched_init;
1345 
1346         if let Some(pending) = ve.triple_fault.pending {
1347             kvm_ve.triple_fault.pending = pending as u8;
1348             kvm_ve.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
1349         }
1350         kvm_ve
1351     }
1352 }
1353 
1354 impl From<&kvm_vcpu_events> for VcpuEvents {
from(ve: &kvm_vcpu_events) -> Self1355     fn from(ve: &kvm_vcpu_events) -> Self {
1356         let exception = VcpuExceptionState {
1357             injected: ve.exception.injected != 0,
1358             nr: ve.exception.nr,
1359             has_error_code: ve.exception.has_error_code != 0,
1360             pending: if ve.flags & KVM_VCPUEVENT_VALID_PAYLOAD != 0 {
1361                 Some(ve.exception.pending != 0)
1362             } else {
1363                 None
1364             },
1365             error_code: ve.exception.error_code,
1366         };
1367 
1368         let interrupt = VcpuInterruptState {
1369             injected: ve.interrupt.injected != 0,
1370             nr: ve.interrupt.nr,
1371             soft: ve.interrupt.soft != 0,
1372             shadow: if ve.flags & KVM_VCPUEVENT_VALID_SHADOW != 0 {
1373                 Some(ve.interrupt.shadow)
1374             } else {
1375                 None
1376             },
1377         };
1378 
1379         let nmi = VcpuNmiState {
1380             injected: ve.interrupt.injected != 0,
1381             pending: if ve.flags & KVM_VCPUEVENT_VALID_NMI_PENDING != 0 {
1382                 Some(ve.nmi.pending != 0)
1383             } else {
1384                 None
1385             },
1386             masked: ve.nmi.masked != 0,
1387         };
1388 
1389         let sipi_vector = if ve.flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR != 0 {
1390             Some(ve.sipi_vector)
1391         } else {
1392             None
1393         };
1394 
1395         let smi = VcpuSmiState {
1396             smm: if ve.flags & KVM_VCPUEVENT_VALID_SMM != 0 {
1397                 Some(ve.smi.smm != 0)
1398             } else {
1399                 None
1400             },
1401             pending: ve.smi.pending != 0,
1402             smm_inside_nmi: ve.smi.smm_inside_nmi != 0,
1403             latched_init: ve.smi.latched_init,
1404         };
1405 
1406         let triple_fault = VcpuTripleFaultState {
1407             pending: if ve.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT != 0 {
1408                 Some(ve.triple_fault.pending != 0)
1409             } else {
1410                 None
1411             },
1412         };
1413 
1414         let exception_payload = if ve.flags & KVM_VCPUEVENT_VALID_PAYLOAD != 0 {
1415             Some(ve.exception_payload)
1416         } else {
1417             None
1418         };
1419 
1420         VcpuEvents {
1421             exception,
1422             interrupt,
1423             nmi,
1424             sipi_vector,
1425             smi,
1426             triple_fault,
1427             exception_payload,
1428         }
1429     }
1430 }
1431 
1432 impl From<&kvm_segment> for Segment {
from(s: &kvm_segment) -> Self1433     fn from(s: &kvm_segment) -> Self {
1434         Segment {
1435             base: s.base,
1436             limit: s.limit,
1437             selector: s.selector,
1438             type_: s.type_,
1439             present: s.present,
1440             dpl: s.dpl,
1441             db: s.db,
1442             s: s.s,
1443             l: s.l,
1444             g: s.g,
1445             avl: s.avl,
1446         }
1447     }
1448 }
1449 
1450 impl From<&Segment> for kvm_segment {
from(s: &Segment) -> Self1451     fn from(s: &Segment) -> Self {
1452         kvm_segment {
1453             base: s.base,
1454             limit: s.limit,
1455             selector: s.selector,
1456             type_: s.type_,
1457             present: s.present,
1458             dpl: s.dpl,
1459             db: s.db,
1460             s: s.s,
1461             l: s.l,
1462             g: s.g,
1463             avl: s.avl,
1464             unusable: match s.present {
1465                 0 => 1,
1466                 _ => 0,
1467             },
1468             ..Default::default()
1469         }
1470     }
1471 }
1472 
1473 impl From<&kvm_dtable> for DescriptorTable {
from(dt: &kvm_dtable) -> Self1474     fn from(dt: &kvm_dtable) -> Self {
1475         DescriptorTable {
1476             base: dt.base,
1477             limit: dt.limit,
1478         }
1479     }
1480 }
1481 
1482 impl From<&DescriptorTable> for kvm_dtable {
from(dt: &DescriptorTable) -> Self1483     fn from(dt: &DescriptorTable) -> Self {
1484         kvm_dtable {
1485             base: dt.base,
1486             limit: dt.limit,
1487             ..Default::default()
1488         }
1489     }
1490 }
1491 
1492 impl From<&kvm_sregs> for Sregs {
from(r: &kvm_sregs) -> Self1493     fn from(r: &kvm_sregs) -> Self {
1494         Sregs {
1495             cs: Segment::from(&r.cs),
1496             ds: Segment::from(&r.ds),
1497             es: Segment::from(&r.es),
1498             fs: Segment::from(&r.fs),
1499             gs: Segment::from(&r.gs),
1500             ss: Segment::from(&r.ss),
1501             tr: Segment::from(&r.tr),
1502             ldt: Segment::from(&r.ldt),
1503             gdt: DescriptorTable::from(&r.gdt),
1504             idt: DescriptorTable::from(&r.idt),
1505             cr0: r.cr0,
1506             cr2: r.cr2,
1507             cr3: r.cr3,
1508             cr4: r.cr4,
1509             cr8: r.cr8,
1510             efer: r.efer,
1511         }
1512     }
1513 }
1514 
1515 impl From<&kvm_fpu> for Fpu {
from(r: &kvm_fpu) -> Self1516     fn from(r: &kvm_fpu) -> Self {
1517         Fpu {
1518             fpr: r.fpr,
1519             fcw: r.fcw,
1520             fsw: r.fsw,
1521             ftwx: r.ftwx,
1522             last_opcode: r.last_opcode,
1523             last_ip: r.last_ip,
1524             last_dp: r.last_dp,
1525             xmm: r.xmm,
1526             mxcsr: r.mxcsr,
1527         }
1528     }
1529 }
1530 
1531 impl From<&Fpu> for kvm_fpu {
from(r: &Fpu) -> Self1532     fn from(r: &Fpu) -> Self {
1533         kvm_fpu {
1534             fpr: r.fpr,
1535             fcw: r.fcw,
1536             fsw: r.fsw,
1537             ftwx: r.ftwx,
1538             last_opcode: r.last_opcode,
1539             last_ip: r.last_ip,
1540             last_dp: r.last_dp,
1541             xmm: r.xmm,
1542             mxcsr: r.mxcsr,
1543             ..Default::default()
1544         }
1545     }
1546 }
1547 
1548 impl Xsave {
from(r: Vec<u32>) -> Self1549     fn from(r: Vec<u32>) -> Self {
1550         Xsave(r)
1551     }
1552 }
1553 
1554 impl From<&kvm_debugregs> for DebugRegs {
from(r: &kvm_debugregs) -> Self1555     fn from(r: &kvm_debugregs) -> Self {
1556         DebugRegs {
1557             db: r.db,
1558             dr6: r.dr6,
1559             dr7: r.dr7,
1560         }
1561     }
1562 }
1563 
1564 impl From<&DebugRegs> for kvm_debugregs {
from(r: &DebugRegs) -> Self1565     fn from(r: &DebugRegs) -> Self {
1566         kvm_debugregs {
1567             db: r.db,
1568             dr6: r.dr6,
1569             dr7: r.dr7,
1570             ..Default::default()
1571         }
1572     }
1573 }
1574 
from_kvm_xcrs(r: &kvm_xcrs) -> Vec<Register>1575 fn from_kvm_xcrs(r: &kvm_xcrs) -> Vec<Register> {
1576     r.xcrs
1577         .iter()
1578         .take(r.nr_xcrs as usize)
1579         .map(|x| Register {
1580             id: x.xcr,
1581             value: x.value,
1582         })
1583         .collect()
1584 }
1585 
to_kvm_xcrs(r: &[Register]) -> kvm_xcrs1586 fn to_kvm_xcrs(r: &[Register]) -> kvm_xcrs {
1587     let mut kvm = kvm_xcrs {
1588         nr_xcrs: r.len() as u32,
1589         ..Default::default()
1590     };
1591     for (i, &xcr) in r.iter().enumerate() {
1592         kvm.xcrs[i].xcr = xcr.id as u32;
1593         kvm.xcrs[i].value = xcr.value;
1594     }
1595     kvm
1596 }
1597 
to_kvm_msrs(vec: &[Register]) -> Vec<kvm_msrs>1598 fn to_kvm_msrs(vec: &[Register]) -> Vec<kvm_msrs> {
1599     let vec: Vec<kvm_msr_entry> = vec
1600         .iter()
1601         .map(|e| kvm_msr_entry {
1602             index: e.id as u32,
1603             data: e.value,
1604             ..Default::default()
1605         })
1606         .collect();
1607 
1608     let mut msrs = vec_with_array_field::<kvm_msrs, kvm_msr_entry>(vec.len());
1609     unsafe {
1610         // Mapping the unsized array to a slice is unsafe because the length isn't known.
1611         // Providing the length used to create the struct guarantees the entire slice is valid.
1612         msrs[0]
1613             .entries
1614             .as_mut_slice(vec.len())
1615             .copy_from_slice(&vec);
1616     }
1617     msrs[0].nmsrs = vec.len() as u32;
1618     msrs
1619 }
1620 
1621 #[cfg(test)]
1622 mod tests {
1623     use super::*;
1624 
1625     #[test]
vcpu_event_to_from()1626     fn vcpu_event_to_from() {
1627         // All data is random.
1628         let mut kvm_ve: kvm_vcpu_events = Default::default();
1629         kvm_ve.exception.injected = 1;
1630         kvm_ve.exception.nr = 65;
1631         kvm_ve.exception.has_error_code = 1;
1632         kvm_ve.exception.error_code = 110;
1633         kvm_ve.exception.pending = 1;
1634 
1635         kvm_ve.interrupt.injected = 1;
1636         kvm_ve.interrupt.nr = 100;
1637         kvm_ve.interrupt.soft = 1;
1638         kvm_ve.interrupt.shadow = 114;
1639 
1640         kvm_ve.nmi.injected = 1;
1641         kvm_ve.nmi.pending = 1;
1642         kvm_ve.nmi.masked = 0;
1643 
1644         kvm_ve.sipi_vector = 105;
1645 
1646         kvm_ve.smi.smm = 1;
1647         kvm_ve.smi.pending = 1;
1648         kvm_ve.smi.smm_inside_nmi = 1;
1649         kvm_ve.smi.latched_init = 100;
1650 
1651         kvm_ve.triple_fault.pending = 0;
1652 
1653         kvm_ve.exception_payload = 33;
1654         kvm_ve.exception_has_payload = 1;
1655 
1656         kvm_ve.flags = 0
1657             | KVM_VCPUEVENT_VALID_PAYLOAD
1658             | KVM_VCPUEVENT_VALID_SMM
1659             | KVM_VCPUEVENT_VALID_NMI_PENDING
1660             | KVM_VCPUEVENT_VALID_SIPI_VECTOR
1661             | KVM_VCPUEVENT_VALID_SHADOW;
1662 
1663         let ve: VcpuEvents = VcpuEvents::from(&kvm_ve);
1664         assert_eq!(ve.exception.injected, true);
1665         assert_eq!(ve.exception.nr, 65);
1666         assert_eq!(ve.exception.has_error_code, true);
1667         assert_eq!(ve.exception.error_code, 110);
1668         assert_eq!(ve.exception.pending.unwrap(), true);
1669 
1670         assert_eq!(ve.interrupt.injected, true);
1671         assert_eq!(ve.interrupt.nr, 100);
1672         assert_eq!(ve.interrupt.soft, true);
1673         assert_eq!(ve.interrupt.shadow.unwrap(), 114);
1674 
1675         assert_eq!(ve.nmi.injected, true);
1676         assert_eq!(ve.nmi.pending.unwrap(), true);
1677         assert_eq!(ve.nmi.masked, false);
1678 
1679         assert_eq!(ve.sipi_vector.unwrap(), 105);
1680 
1681         assert_eq!(ve.smi.smm.unwrap(), true);
1682         assert_eq!(ve.smi.pending, true);
1683         assert_eq!(ve.smi.smm_inside_nmi, true);
1684         assert_eq!(ve.smi.latched_init, 100);
1685 
1686         assert_eq!(ve.triple_fault.pending, None);
1687 
1688         assert_eq!(ve.exception_payload.unwrap(), 33);
1689 
1690         let kvm_ve_restored: kvm_vcpu_events = kvm_vcpu_events::from(&ve);
1691         assert_eq!(kvm_ve_restored.exception.injected, 1);
1692         assert_eq!(kvm_ve_restored.exception.nr, 65);
1693         assert_eq!(kvm_ve_restored.exception.has_error_code, 1);
1694         assert_eq!(kvm_ve_restored.exception.error_code, 110);
1695         assert_eq!(kvm_ve_restored.exception.pending, 1);
1696 
1697         assert_eq!(kvm_ve_restored.interrupt.injected, 1);
1698         assert_eq!(kvm_ve_restored.interrupt.nr, 100);
1699         assert_eq!(kvm_ve_restored.interrupt.soft, 1);
1700         assert_eq!(kvm_ve_restored.interrupt.shadow, 114);
1701 
1702         assert_eq!(kvm_ve_restored.nmi.injected, 1);
1703         assert_eq!(kvm_ve_restored.nmi.pending, 1);
1704         assert_eq!(kvm_ve_restored.nmi.masked, 0);
1705 
1706         assert_eq!(kvm_ve_restored.sipi_vector, 105);
1707 
1708         assert_eq!(kvm_ve_restored.smi.smm, 1);
1709         assert_eq!(kvm_ve_restored.smi.pending, 1);
1710         assert_eq!(kvm_ve_restored.smi.smm_inside_nmi, 1);
1711         assert_eq!(kvm_ve_restored.smi.latched_init, 100);
1712 
1713         assert_eq!(kvm_ve_restored.triple_fault.pending, 0);
1714 
1715         assert_eq!(kvm_ve_restored.exception_payload, 33);
1716         assert_eq!(kvm_ve_restored.exception_has_payload, 1);
1717     }
1718 }
1719