1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::arch::x86_64::CpuidResult;
6 use std::mem::size_of;
7
8 use base::errno_result;
9 use base::error;
10 use base::ioctl;
11 use base::ioctl_with_mut_ptr;
12 use base::ioctl_with_mut_ref;
13 use base::ioctl_with_ptr;
14 use base::ioctl_with_ref;
15 use base::ioctl_with_val;
16 use base::AsRawDescriptor;
17 use base::Error;
18 use base::IoctlNr;
19 use base::MappedRegion;
20 use base::Result;
21 use data_model::vec_with_array_field;
22 use kvm_sys::*;
23 use libc::E2BIG;
24 use libc::EIO;
25 use libc::ENXIO;
26 use vm_memory::GuestAddress;
27
28 use super::Config;
29 use super::Kvm;
30 use super::KvmVcpu;
31 use super::KvmVm;
32 use crate::get_tsc_offset_from_msr;
33 use crate::host_phys_addr_bits;
34 use crate::set_tsc_offset_via_msr;
35 use crate::ClockState;
36 use crate::CpuId;
37 use crate::CpuIdEntry;
38 use crate::DebugRegs;
39 use crate::DescriptorTable;
40 use crate::DeviceKind;
41 use crate::Fpu;
42 use crate::HypervisorX86_64;
43 use crate::IoapicRedirectionTableEntry;
44 use crate::IoapicState;
45 use crate::IrqSourceChip;
46 use crate::LapicState;
47 use crate::PicSelect;
48 use crate::PicState;
49 use crate::PitChannelState;
50 use crate::PitState;
51 use crate::ProtectionType;
52 use crate::Register;
53 use crate::Regs;
54 use crate::Segment;
55 use crate::Sregs;
56 use crate::VcpuEvents;
57 use crate::VcpuExceptionState;
58 use crate::VcpuExit;
59 use crate::VcpuInterruptState;
60 use crate::VcpuNmiState;
61 use crate::VcpuSmiState;
62 use crate::VcpuTripleFaultState;
63 use crate::VcpuX86_64;
64 use crate::VmCap;
65 use crate::VmX86_64;
66 use crate::Xsave;
67 use crate::MAX_IOAPIC_PINS;
68 use crate::NUM_IOAPIC_PINS;
69
70 type KvmCpuId = kvm::CpuId;
71 const KVM_XSAVE_MAX_SIZE: i32 = 4096;
72
get_cpuid_with_initial_capacity<T: AsRawDescriptor>( descriptor: &T, kind: IoctlNr, initial_capacity: usize, ) -> Result<CpuId>73 pub fn get_cpuid_with_initial_capacity<T: AsRawDescriptor>(
74 descriptor: &T,
75 kind: IoctlNr,
76 initial_capacity: usize,
77 ) -> Result<CpuId> {
78 let mut entries: usize = initial_capacity;
79
80 loop {
81 let mut kvm_cpuid = KvmCpuId::new(entries);
82
83 let ret = unsafe {
84 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the
85 // memory allocated for the struct. The limit is read from nent within KvmCpuId,
86 // which is set to the allocated size above.
87 ioctl_with_mut_ptr(descriptor, kind, kvm_cpuid.as_mut_ptr())
88 };
89 if ret < 0 {
90 let err = Error::last();
91 match err.errno() {
92 E2BIG => {
93 // double the available memory for cpuid entries for kvm.
94 if let Some(val) = entries.checked_mul(2) {
95 entries = val;
96 } else {
97 return Err(err);
98 }
99 }
100 _ => return Err(err),
101 }
102 } else {
103 return Ok(CpuId::from(&kvm_cpuid));
104 }
105 }
106 }
107
108 impl Kvm {
get_cpuid(&self, kind: IoctlNr) -> Result<CpuId>109 pub fn get_cpuid(&self, kind: IoctlNr) -> Result<CpuId> {
110 const KVM_MAX_ENTRIES: usize = 256;
111 get_cpuid_with_initial_capacity(self, kind, KVM_MAX_ENTRIES)
112 }
113
114 // The x86 machine type is always 0. Protected VMs are not supported.
get_vm_type(&self, protection_type: ProtectionType) -> Result<u32>115 pub fn get_vm_type(&self, protection_type: ProtectionType) -> Result<u32> {
116 if protection_type == ProtectionType::Unprotected {
117 Ok(0)
118 } else {
119 error!("Protected mode is not supported on x86_64.");
120 Err(Error::new(libc::EINVAL))
121 }
122 }
123
124 /// Get the size of guest physical addresses in bits.
get_guest_phys_addr_bits(&self) -> u8125 pub fn get_guest_phys_addr_bits(&self) -> u8 {
126 // Assume the guest physical address size is the same as the host.
127 host_phys_addr_bits()
128 }
129 }
130
131 impl HypervisorX86_64 for Kvm {
get_supported_cpuid(&self) -> Result<CpuId>132 fn get_supported_cpuid(&self) -> Result<CpuId> {
133 self.get_cpuid(KVM_GET_SUPPORTED_CPUID())
134 }
135
get_emulated_cpuid(&self) -> Result<CpuId>136 fn get_emulated_cpuid(&self) -> Result<CpuId> {
137 self.get_cpuid(KVM_GET_EMULATED_CPUID())
138 }
139
get_msr_index_list(&self) -> Result<Vec<u32>>140 fn get_msr_index_list(&self) -> Result<Vec<u32>> {
141 const MAX_KVM_MSR_ENTRIES: usize = 256;
142
143 let mut msr_list = vec_with_array_field::<kvm_msr_list, u32>(MAX_KVM_MSR_ENTRIES);
144 msr_list[0].nmsrs = MAX_KVM_MSR_ENTRIES as u32;
145
146 let ret = unsafe {
147 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
148 // allocated for the struct. The limit is read from nmsrs, which is set to the allocated
149 // size (MAX_KVM_MSR_ENTRIES) above.
150 ioctl_with_mut_ref(self, KVM_GET_MSR_INDEX_LIST(), &mut msr_list[0])
151 };
152 if ret < 0 {
153 return errno_result();
154 }
155
156 let mut nmsrs = msr_list[0].nmsrs;
157
158 // Mapping the unsized array to a slice is unsafe because the length isn't known. Using
159 // the length we originally allocated with eliminates the possibility of overflow.
160 let indices: &[u32] = unsafe {
161 if nmsrs > MAX_KVM_MSR_ENTRIES as u32 {
162 nmsrs = MAX_KVM_MSR_ENTRIES as u32;
163 }
164 msr_list[0].indices.as_slice(nmsrs as usize)
165 };
166
167 Ok(indices.to_vec())
168 }
169 }
170
171 impl KvmVm {
172 /// Does platform specific initialization for the KvmVm.
init_arch(&self, _cfg: &Config) -> Result<()>173 pub fn init_arch(&self, _cfg: &Config) -> Result<()> {
174 Ok(())
175 }
176
177 /// Checks if a particular `VmCap` is available, or returns None if arch-independent
178 /// Vm.check_capability() should handle the check.
check_capability_arch(&self, c: VmCap) -> Option<bool>179 pub fn check_capability_arch(&self, c: VmCap) -> Option<bool> {
180 match c {
181 VmCap::PvClock => Some(true),
182 _ => None,
183 }
184 }
185
186 /// Returns the params to pass to KVM_CREATE_DEVICE for a `kind` device on this arch, or None to
187 /// let the arch-independent `KvmVm::create_device` handle it.
get_device_params_arch(&self, _kind: DeviceKind) -> Option<kvm_create_device>188 pub fn get_device_params_arch(&self, _kind: DeviceKind) -> Option<kvm_create_device> {
189 None
190 }
191
192 /// Arch-specific implementation of `Vm::get_pvclock`.
get_pvclock_arch(&self) -> Result<ClockState>193 pub fn get_pvclock_arch(&self) -> Result<ClockState> {
194 // Safe because we know that our file is a VM fd, we know the kernel will only write correct
195 // amount of memory to our pointer, and we verify the return result.
196 let mut clock_data: kvm_clock_data = Default::default();
197 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock_data) };
198 if ret == 0 {
199 Ok(ClockState::from(clock_data))
200 } else {
201 errno_result()
202 }
203 }
204
205 /// Arch-specific implementation of `Vm::set_pvclock`.
set_pvclock_arch(&self, state: &ClockState) -> Result<()>206 pub fn set_pvclock_arch(&self, state: &ClockState) -> Result<()> {
207 let clock_data = kvm_clock_data::from(*state);
208 // Safe because we know that our file is a VM fd, we know the kernel will only read correct
209 // amount of memory from our pointer, and we verify the return result.
210 let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), &clock_data) };
211 if ret == 0 {
212 Ok(())
213 } else {
214 errno_result()
215 }
216 }
217
218 /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl.
219 ///
220 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
get_pic_state(&self, id: PicSelect) -> Result<kvm_pic_state>221 pub fn get_pic_state(&self, id: PicSelect) -> Result<kvm_pic_state> {
222 let mut irqchip_state = kvm_irqchip {
223 chip_id: id as u32,
224 ..Default::default()
225 };
226 let ret = unsafe {
227 // Safe because we know our file is a VM fd, we know the kernel will only write
228 // correct amount of memory to our pointer, and we verify the return result.
229 ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
230 };
231 if ret == 0 {
232 Ok(unsafe {
233 // Safe as we know that we are retrieving data related to the
234 // PIC (primary or secondary) and not IOAPIC.
235 irqchip_state.chip.pic
236 })
237 } else {
238 errno_result()
239 }
240 }
241
242 /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl.
243 ///
244 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
set_pic_state(&self, id: PicSelect, state: &kvm_pic_state) -> Result<()>245 pub fn set_pic_state(&self, id: PicSelect, state: &kvm_pic_state) -> Result<()> {
246 let mut irqchip_state = kvm_irqchip {
247 chip_id: id as u32,
248 ..Default::default()
249 };
250 irqchip_state.chip.pic = *state;
251 // Safe because we know that our file is a VM fd, we know the kernel will only read
252 // correct amount of memory from our pointer, and we verify the return result.
253 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
254 if ret == 0 {
255 Ok(())
256 } else {
257 errno_result()
258 }
259 }
260
261 /// Retrieves the KVM_IOAPIC_NUM_PINS value for emulated IO-APIC.
get_ioapic_num_pins(&self) -> Result<usize>262 pub fn get_ioapic_num_pins(&self) -> Result<usize> {
263 // Safe because we know that our file is a KVM fd, and if the cap is invalid KVM assumes
264 // it's an unavailable extension and returns 0, producing default KVM_IOAPIC_NUM_PINS value.
265 match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), KVM_CAP_IOAPIC_NUM_PINS as u64) }
266 {
267 ret if ret < 0 => errno_result(),
268 ret => Ok((ret as usize).max(NUM_IOAPIC_PINS).min(MAX_IOAPIC_PINS)),
269 }
270 }
271
272 /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl.
273 ///
274 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
get_ioapic_state(&self) -> Result<kvm_ioapic_state>275 pub fn get_ioapic_state(&self) -> Result<kvm_ioapic_state> {
276 let mut irqchip_state = kvm_irqchip {
277 chip_id: 2,
278 ..Default::default()
279 };
280 let ret = unsafe {
281 // Safe because we know our file is a VM fd, we know the kernel will only write
282 // correct amount of memory to our pointer, and we verify the return result.
283 ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
284 };
285 if ret == 0 {
286 Ok(unsafe {
287 // Safe as we know that we are retrieving data related to the
288 // IOAPIC and not PIC.
289 irqchip_state.chip.ioapic
290 })
291 } else {
292 errno_result()
293 }
294 }
295
296 /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl.
297 ///
298 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()>299 pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> {
300 let mut irqchip_state = kvm_irqchip {
301 chip_id: 2,
302 ..Default::default()
303 };
304 irqchip_state.chip.ioapic = *state;
305 // Safe because we know that our file is a VM fd, we know the kernel will only read
306 // correct amount of memory from our pointer, and we verify the return result.
307 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
308 if ret == 0 {
309 Ok(())
310 } else {
311 errno_result()
312 }
313 }
314
315 /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl.
316 ///
317 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
create_pit(&self) -> Result<()>318 pub fn create_pit(&self) -> Result<()> {
319 let pit_config = kvm_pit_config::default();
320 // Safe because we know that our file is a VM fd, we know the kernel will only read the
321 // correct amount of memory from our pointer, and we verify the return result.
322 let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) };
323 if ret == 0 {
324 Ok(())
325 } else {
326 errno_result()
327 }
328 }
329
330 /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl.
331 ///
332 /// Note that this call can only succeed after a call to `Vm::create_pit`.
get_pit_state(&self) -> Result<kvm_pit_state2>333 pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
334 // Safe because we know that our file is a VM fd, we know the kernel will only write
335 // correct amount of memory to our pointer, and we verify the return result.
336 let mut pit_state = Default::default();
337 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pit_state) };
338 if ret == 0 {
339 Ok(pit_state)
340 } else {
341 errno_result()
342 }
343 }
344
345 /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl.
346 ///
347 /// Note that this call can only succeed after a call to `Vm::create_pit`.
set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()>348 pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
349 // Safe because we know that our file is a VM fd, we know the kernel will only read
350 // correct amount of memory from our pointer, and we verify the return result.
351 let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pit_state) };
352 if ret == 0 {
353 Ok(())
354 } else {
355 errno_result()
356 }
357 }
358
359 /// Enable userspace msr.
enable_userspace_msr(&self) -> Result<()>360 pub fn enable_userspace_msr(&self) -> Result<()> {
361 let mut cap = kvm_enable_cap {
362 cap: KVM_CAP_X86_USER_SPACE_MSR,
363 ..Default::default()
364 };
365 cap.args[0] = (KVM_MSR_EXIT_REASON_UNKNOWN
366 | KVM_MSR_EXIT_REASON_INVAL
367 | KVM_MSR_EXIT_REASON_FILTER) as u64;
368
369 // Safe because we know that our file is a VM fd, we know that the
370 // kernel will only read correct amount of memory from our pointer, and
371 // we verify the return result.
372 let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), &cap) };
373 if ret < 0 {
374 errno_result()
375 } else {
376 Ok(())
377 }
378 }
379
380 /// Set MSR_PLATFORM_INFO read access.
set_platform_info_read_access(&self, allow_read: bool) -> Result<()>381 pub fn set_platform_info_read_access(&self, allow_read: bool) -> Result<()> {
382 let mut cap = kvm_enable_cap {
383 cap: KVM_CAP_MSR_PLATFORM_INFO,
384 ..Default::default()
385 };
386 cap.args[0] = allow_read as u64;
387
388 // Safe because we know that our file is a VM fd, we know that the
389 // kernel will only read correct amount of memory from our pointer, and
390 // we verify the return result.
391 let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), &cap) };
392 if ret < 0 {
393 errno_result()
394 } else {
395 Ok(())
396 }
397 }
398
399 /// Set msr filter.
set_msr_filter(&self, msr_list: (Vec<u32>, Vec<u32>)) -> Result<()>400 pub fn set_msr_filter(&self, msr_list: (Vec<u32>, Vec<u32>)) -> Result<()> {
401 let mut rd_nmsrs: u32 = 0;
402 let mut wr_nmsrs: u32 = 0;
403 let mut rd_msr_bitmap: [u8; KVM_MSR_FILTER_RANGE_MAX_BYTES] =
404 [0xff; KVM_MSR_FILTER_RANGE_MAX_BYTES];
405 let mut wr_msr_bitmap: [u8; KVM_MSR_FILTER_RANGE_MAX_BYTES] =
406 [0xff; KVM_MSR_FILTER_RANGE_MAX_BYTES];
407 let (rd_msrs, wr_msrs) = msr_list;
408
409 for index in rd_msrs {
410 // currently we only consider the MSR lower than
411 // KVM_MSR_FILTER_RANGE_MAX_BITS
412 if index >= (KVM_MSR_FILTER_RANGE_MAX_BITS as u32) {
413 continue;
414 }
415 rd_nmsrs += 1;
416 rd_msr_bitmap[(index / 8) as usize] &= !(1 << (index & 0x7));
417 }
418 for index in wr_msrs {
419 // currently we only consider the MSR lower than
420 // KVM_MSR_FILTER_RANGE_MAX_BITS
421 if index >= (KVM_MSR_FILTER_RANGE_MAX_BITS as u32) {
422 continue;
423 }
424 wr_nmsrs += 1;
425 wr_msr_bitmap[(index / 8) as usize] &= !(1 << (index & 0x7));
426 }
427
428 let mut msr_filter = kvm_msr_filter {
429 flags: KVM_MSR_FILTER_DEFAULT_ALLOW,
430 ..Default::default()
431 };
432
433 let mut count = 0;
434 if rd_nmsrs > 0 {
435 msr_filter.ranges[count].flags = KVM_MSR_FILTER_READ;
436 msr_filter.ranges[count].nmsrs = KVM_MSR_FILTER_RANGE_MAX_BITS as u32;
437 msr_filter.ranges[count].base = 0x0;
438 msr_filter.ranges[count].bitmap = rd_msr_bitmap.as_mut_ptr();
439 count += 1;
440 }
441 if wr_nmsrs > 0 {
442 msr_filter.ranges[count].flags = KVM_MSR_FILTER_WRITE;
443 msr_filter.ranges[count].nmsrs = KVM_MSR_FILTER_RANGE_MAX_BITS as u32;
444 msr_filter.ranges[count].base = 0x0;
445 msr_filter.ranges[count].bitmap = wr_msr_bitmap.as_mut_ptr();
446 count += 1;
447 }
448
449 let mut ret = 0;
450 if count > 0 {
451 // Safe because we know that our file is a VM fd, we know that the
452 // kernel will only read correct amount of memory from our pointer, and
453 // we verify the return result.
454 ret = unsafe { ioctl_with_ref(self, KVM_X86_SET_MSR_FILTER(), &msr_filter) };
455 }
456
457 if ret < 0 {
458 errno_result()
459 } else {
460 Ok(())
461 }
462 }
463
464 /// Enable support for split-irqchip.
enable_split_irqchip(&self, ioapic_pins: usize) -> Result<()>465 pub fn enable_split_irqchip(&self, ioapic_pins: usize) -> Result<()> {
466 let mut cap = kvm_enable_cap {
467 cap: KVM_CAP_SPLIT_IRQCHIP,
468 ..Default::default()
469 };
470 cap.args[0] = ioapic_pins as u64;
471 // safe becuase we allocated the struct and we know the kernel will read
472 // exactly the size of the struct
473 let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), &cap) };
474 if ret < 0 {
475 errno_result()
476 } else {
477 Ok(())
478 }
479 }
480 }
481
482 impl VmX86_64 for KvmVm {
get_hypervisor(&self) -> &dyn HypervisorX86_64483 fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
484 &self.kvm
485 }
486
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>487 fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
488 // create_vcpu is declared separately in VmAArch64 and VmX86, so it can return VcpuAArch64
489 // or VcpuX86. But both use the same implementation in KvmVm::create_vcpu.
490 Ok(Box::new(KvmVm::create_kvm_vcpu(self, id)?))
491 }
492
493 /// Sets the address of the three-page region in the VM's address space.
494 ///
495 /// See the documentation on the KVM_SET_TSS_ADDR ioctl.
set_tss_addr(&self, addr: GuestAddress) -> Result<()>496 fn set_tss_addr(&self, addr: GuestAddress) -> Result<()> {
497 // Safe because we know that our file is a VM fd and we verify the return result.
498 let ret = unsafe { ioctl_with_val(self, KVM_SET_TSS_ADDR(), addr.offset() as u64) };
499 if ret == 0 {
500 Ok(())
501 } else {
502 errno_result()
503 }
504 }
505
506 /// Sets the address of a one-page region in the VM's address space.
507 ///
508 /// See the documentation on the KVM_SET_IDENTITY_MAP_ADDR ioctl.
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>509 fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()> {
510 // Safe because we know that our file is a VM fd and we verify the return result.
511 let ret =
512 unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &(addr.offset() as u64)) };
513 if ret == 0 {
514 Ok(())
515 } else {
516 errno_result()
517 }
518 }
519 }
520
521 impl KvmVcpu {
522 /// Arch-specific implementation of `Vcpu::pvclock_ctrl`.
pvclock_ctrl_arch(&self) -> Result<()>523 pub fn pvclock_ctrl_arch(&self) -> Result<()> {
524 let ret = unsafe {
525 // The ioctl is safe because it does not read or write memory in this process.
526 ioctl(self, KVM_KVMCLOCK_CTRL())
527 };
528 if ret == 0 {
529 Ok(())
530 } else {
531 errno_result()
532 }
533 }
534
535 /// Handles a `KVM_EXIT_SYSTEM_EVENT` with event type `KVM_SYSTEM_EVENT_RESET` with the given
536 /// event flags and returns the appropriate `VcpuExit` value for the run loop to handle.
system_event_reset(&self, _event_flags: u64) -> Result<VcpuExit>537 pub fn system_event_reset(&self, _event_flags: u64) -> Result<VcpuExit> {
538 Ok(VcpuExit::SystemEventReset)
539 }
540 }
541
542 impl VcpuX86_64 for KvmVcpu {
543 #[allow(clippy::cast_ptr_alignment)]
set_interrupt_window_requested(&self, requested: bool)544 fn set_interrupt_window_requested(&self, requested: bool) {
545 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
546 // kernel told us how large it was. The pointer is page aligned so casting to a different
547 // type is well defined, hence the clippy allow attribute.
548 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
549 run.request_interrupt_window = requested.into();
550 }
551
552 #[allow(clippy::cast_ptr_alignment)]
ready_for_interrupt(&self) -> bool553 fn ready_for_interrupt(&self) -> bool {
554 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
555 // kernel told us how large it was. The pointer is page aligned so casting to a different
556 // type is well defined, hence the clippy allow attribute.
557 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
558 run.ready_for_interrupt_injection != 0 && run.if_flag != 0
559 }
560
561 /// Use the KVM_INTERRUPT ioctl to inject the specified interrupt vector.
562 ///
563 /// While this ioctl exists on PPC and MIPS as well as x86, the semantics are different and
564 /// ChromeOS doesn't support PPC or MIPS.
interrupt(&self, irq: u32) -> Result<()>565 fn interrupt(&self, irq: u32) -> Result<()> {
566 let interrupt = kvm_interrupt { irq };
567 // safe becuase we allocated the struct and we know the kernel will read
568 // exactly the size of the struct
569 let ret = unsafe { ioctl_with_ref(self, KVM_INTERRUPT(), &interrupt) };
570 if ret == 0 {
571 Ok(())
572 } else {
573 errno_result()
574 }
575 }
576
inject_nmi(&self) -> Result<()>577 fn inject_nmi(&self) -> Result<()> {
578 // Safe because we know that our file is a VCPU fd.
579 let ret = unsafe { ioctl(self, KVM_NMI()) };
580 if ret == 0 {
581 Ok(())
582 } else {
583 errno_result()
584 }
585 }
586
get_regs(&self) -> Result<Regs>587 fn get_regs(&self) -> Result<Regs> {
588 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
589 // correct amount of memory from our pointer, and we verify the return result.
590 let mut regs: kvm_regs = Default::default();
591 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) };
592 if ret == 0 {
593 Ok(Regs::from(®s))
594 } else {
595 errno_result()
596 }
597 }
598
set_regs(&self, regs: &Regs) -> Result<()>599 fn set_regs(&self, regs: &Regs) -> Result<()> {
600 let regs = kvm_regs::from(regs);
601 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
602 // correct amount of memory from our pointer, and we verify the return result.
603 let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), ®s) };
604 if ret == 0 {
605 Ok(())
606 } else {
607 errno_result()
608 }
609 }
610
get_sregs(&self) -> Result<Sregs>611 fn get_sregs(&self) -> Result<Sregs> {
612 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
613 // correct amount of memory to our pointer, and we verify the return result.
614 let mut regs: kvm_sregs = Default::default();
615 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) };
616 if ret == 0 {
617 Ok(Sregs::from(®s))
618 } else {
619 errno_result()
620 }
621 }
622
set_sregs(&self, sregs: &Sregs) -> Result<()>623 fn set_sregs(&self, sregs: &Sregs) -> Result<()> {
624 // Get the current `kvm_sregs` so we can use its `apic_base` and `interrupt_bitmap`, which
625 // are not present in `Sregs`.
626 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
627 // correct amount of memory to our pointer, and we verify the return result.
628 let mut kvm_sregs: kvm_sregs = Default::default();
629 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut kvm_sregs) };
630 if ret != 0 {
631 return errno_result();
632 }
633
634 kvm_sregs.cs = kvm_segment::from(&sregs.cs);
635 kvm_sregs.ds = kvm_segment::from(&sregs.ds);
636 kvm_sregs.es = kvm_segment::from(&sregs.es);
637 kvm_sregs.fs = kvm_segment::from(&sregs.fs);
638 kvm_sregs.gs = kvm_segment::from(&sregs.gs);
639 kvm_sregs.ss = kvm_segment::from(&sregs.ss);
640 kvm_sregs.tr = kvm_segment::from(&sregs.tr);
641 kvm_sregs.ldt = kvm_segment::from(&sregs.ldt);
642 kvm_sregs.gdt = kvm_dtable::from(&sregs.gdt);
643 kvm_sregs.idt = kvm_dtable::from(&sregs.idt);
644 kvm_sregs.cr0 = sregs.cr0;
645 kvm_sregs.cr2 = sregs.cr2;
646 kvm_sregs.cr3 = sregs.cr3;
647 kvm_sregs.cr4 = sregs.cr4;
648 kvm_sregs.cr8 = sregs.cr8;
649 kvm_sregs.efer = sregs.efer;
650
651 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
652 // correct amount of memory from our pointer, and we verify the return result.
653 let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), &kvm_sregs) };
654 if ret == 0 {
655 Ok(())
656 } else {
657 errno_result()
658 }
659 }
660
get_fpu(&self) -> Result<Fpu>661 fn get_fpu(&self) -> Result<Fpu> {
662 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
663 // correct amount of memory to our pointer, and we verify the return result.
664 let mut fpu: kvm_fpu = Default::default();
665 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut fpu) };
666 if ret == 0 {
667 Ok(Fpu::from(&fpu))
668 } else {
669 errno_result()
670 }
671 }
672
set_fpu(&self, fpu: &Fpu) -> Result<()>673 fn set_fpu(&self, fpu: &Fpu) -> Result<()> {
674 let fpu = kvm_fpu::from(fpu);
675 let ret = unsafe {
676 // Here we trust the kernel not to read past the end of the kvm_fpu struct.
677 ioctl_with_ref(self, KVM_SET_FPU(), &fpu)
678 };
679 if ret == 0 {
680 Ok(())
681 } else {
682 errno_result()
683 }
684 }
685
686 /// If the VM reports using XSave2, the function will call XSave2.
get_xsave(&self) -> Result<Xsave>687 fn get_xsave(&self) -> Result<Xsave> {
688 // Safe because we know that our file is a VM fd, we know that the
689 // kernel will only read correct amount of memory from our pointer, and
690 // we verify the return result.
691 // Get the size of Xsave in bytes. Values are of type u32.
692 let size =
693 unsafe { ioctl_with_val(&self.vm, KVM_CHECK_EXTENSION(), KVM_CAP_XSAVE2 as u64) };
694 if size < 0 {
695 return errno_result();
696 }
697 // Size / sizeof(u32) = len of vec.
698 let mut xsave: Vec<u32> = vec![0u32; size as usize / size_of::<u32>()];
699 let ioctl_nr = if size > KVM_XSAVE_MAX_SIZE {
700 KVM_GET_XSAVE2()
701 } else {
702 KVM_GET_XSAVE()
703 };
704 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
705 // correct amount of memory to our pointer, and we verify the return result.
706 let ret = unsafe { ioctl_with_mut_ptr(self, ioctl_nr, xsave.as_mut_ptr()) };
707 if ret == 0 {
708 Ok(Xsave::from(xsave))
709 } else {
710 errno_result()
711 }
712 }
713
set_xsave(&self, xsave: &Xsave) -> Result<()>714 fn set_xsave(&self, xsave: &Xsave) -> Result<()> {
715 // Safe because we know that our file is a VM fd, we know that the
716 // kernel will only read correct amount of memory from our pointer, and
717 // get size from KVM_CAP_XSAVE2. Will return at least 4096 as a value if XSAVE2 is not
718 // supported or if no extensions are enabled. Otherwise it will return a value higher than
719 // 4096.
720 let size =
721 unsafe { ioctl_with_val(&self.vm, KVM_CHECK_EXTENSION(), KVM_CAP_XSAVE2 as u64) };
722 if size < 0 {
723 return errno_result();
724 }
725 // Ensure xsave is the same size as used in get_xsave.
726 // Return err if sizes don't match => not the same extensions are enabled for CPU.
727 if xsave.0.len() != size as usize / size_of::<u32>() {
728 return Err(Error::new(EIO));
729 }
730
731 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
732 // correct amount of memory to our pointer, and we verify the return result.
733 // Because of the len check above, and because the layout of `struct kvm_xsave` is
734 // compatible with a slice of `u32`, we can pass the pointer to `xsave` directly.
735 let ret = unsafe { ioctl_with_ptr(self, KVM_SET_XSAVE(), xsave.0.as_ptr()) };
736 if ret == 0 {
737 Ok(())
738 } else {
739 errno_result()
740 }
741 }
742
get_vcpu_events(&self) -> Result<VcpuEvents>743 fn get_vcpu_events(&self) -> Result<VcpuEvents> {
744 let mut vcpu_evts: kvm_vcpu_events = Default::default();
745 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut vcpu_evts) };
746 if ret == 0 {
747 Ok(VcpuEvents::from(&vcpu_evts))
748 } else {
749 errno_result()
750 }
751 }
752
set_vcpu_events(&self, vcpu_evts: &VcpuEvents) -> Result<()>753 fn set_vcpu_events(&self, vcpu_evts: &VcpuEvents) -> Result<()> {
754 let vcpu_events = kvm_vcpu_events::from(vcpu_evts);
755 let ret = unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), &vcpu_events) };
756 if ret == 0 {
757 Ok(())
758 } else {
759 errno_result()
760 }
761 }
762
get_debugregs(&self) -> Result<DebugRegs>763 fn get_debugregs(&self) -> Result<DebugRegs> {
764 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
765 // correct amount of memory to our pointer, and we verify the return result.
766 let mut regs: kvm_debugregs = Default::default();
767 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut regs) };
768 if ret == 0 {
769 Ok(DebugRegs::from(®s))
770 } else {
771 errno_result()
772 }
773 }
774
set_debugregs(&self, dregs: &DebugRegs) -> Result<()>775 fn set_debugregs(&self, dregs: &DebugRegs) -> Result<()> {
776 let dregs = kvm_debugregs::from(dregs);
777 let ret = unsafe {
778 // Here we trust the kernel not to read past the end of the kvm_debugregs struct.
779 ioctl_with_ref(self, KVM_SET_DEBUGREGS(), &dregs)
780 };
781 if ret == 0 {
782 Ok(())
783 } else {
784 errno_result()
785 }
786 }
787
get_xcrs(&self) -> Result<Vec<Register>>788 fn get_xcrs(&self) -> Result<Vec<Register>> {
789 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
790 // correct amount of memory to our pointer, and we verify the return result.
791 let mut regs: kvm_xcrs = Default::default();
792 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut regs) };
793 if ret == 0 {
794 Ok(from_kvm_xcrs(®s))
795 } else {
796 errno_result()
797 }
798 }
799
set_xcrs(&self, xcrs: &[Register]) -> Result<()>800 fn set_xcrs(&self, xcrs: &[Register]) -> Result<()> {
801 let xcrs = to_kvm_xcrs(xcrs);
802 let ret = unsafe {
803 // Here we trust the kernel not to read past the end of the kvm_xcrs struct.
804 ioctl_with_ref(self, KVM_SET_XCRS(), &xcrs)
805 };
806 if ret == 0 {
807 Ok(())
808 } else {
809 errno_result()
810 }
811 }
812
get_msrs(&self, vec: &mut Vec<Register>) -> Result<()>813 fn get_msrs(&self, vec: &mut Vec<Register>) -> Result<()> {
814 let msrs = to_kvm_msrs(vec);
815 let ret = unsafe {
816 // Here we trust the kernel not to read or write past the end of the kvm_msrs struct.
817 ioctl_with_ref(self, KVM_GET_MSRS(), &msrs[0])
818 };
819 // KVM_GET_MSRS actually returns the number of msr entries written.
820 if ret < 0 {
821 return errno_result();
822 }
823 // Safe because we trust the kernel to return the correct array length on success.
824 let entries = unsafe {
825 let count = ret as usize;
826 assert!(count <= vec.len());
827 msrs[0].entries.as_slice(count)
828 };
829 vec.truncate(0);
830 vec.extend(entries.iter().map(|e| Register {
831 id: e.index,
832 value: e.data,
833 }));
834 Ok(())
835 }
836
get_all_msrs(&self) -> Result<Vec<Register>>837 fn get_all_msrs(&self) -> Result<Vec<Register>> {
838 let mut msrs = self
839 .kvm
840 .get_msr_index_list()?
841 .into_iter()
842 .map(|i| Register { id: i, value: 0 })
843 .collect();
844 self.get_msrs(&mut msrs)?;
845 Ok(msrs)
846 }
847
set_msrs(&self, vec: &[Register]) -> Result<()>848 fn set_msrs(&self, vec: &[Register]) -> Result<()> {
849 let msrs = to_kvm_msrs(vec);
850 let ret = unsafe {
851 // Here we trust the kernel not to read past the end of the kvm_msrs struct.
852 ioctl_with_ref(self, KVM_SET_MSRS(), &msrs[0])
853 };
854 // KVM_SET_MSRS actually returns the number of msr entries written.
855 if ret >= 0 {
856 Ok(())
857 } else {
858 errno_result()
859 }
860 }
861
set_cpuid(&self, cpuid: &CpuId) -> Result<()>862 fn set_cpuid(&self, cpuid: &CpuId) -> Result<()> {
863 let cpuid = KvmCpuId::from(cpuid);
864 let ret = unsafe {
865 // Here we trust the kernel not to read past the end of the kvm_msrs struct.
866 ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_ptr())
867 };
868 if ret == 0 {
869 Ok(())
870 } else {
871 errno_result()
872 }
873 }
874
get_hyperv_cpuid(&self) -> Result<CpuId>875 fn get_hyperv_cpuid(&self) -> Result<CpuId> {
876 const KVM_MAX_ENTRIES: usize = 256;
877 get_cpuid_with_initial_capacity(self, KVM_GET_SUPPORTED_HV_CPUID(), KVM_MAX_ENTRIES)
878 }
879
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>880 fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()> {
881 use kvm_sys::*;
882 let mut dbg: kvm_guest_debug = Default::default();
883
884 if addrs.len() > 4 {
885 error!(
886 "Support 4 breakpoints at most but {} addresses are passed",
887 addrs.len()
888 );
889 return Err(base::Error::new(libc::EINVAL));
890 }
891
892 dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
893 if enable_singlestep {
894 dbg.control |= KVM_GUESTDBG_SINGLESTEP;
895 }
896
897 // Set bits 9 and 10.
898 // bit 9: GE (global exact breakpoint enable) flag.
899 // bit 10: always 1.
900 dbg.arch.debugreg[7] = 0x0600;
901
902 for (i, addr) in addrs.iter().enumerate() {
903 dbg.arch.debugreg[i] = addr.0;
904 // Set global breakpoint enable flag
905 dbg.arch.debugreg[7] |= 2 << (i * 2);
906 }
907
908 let ret = unsafe {
909 // Here we trust the kernel not to read past the end of the kvm_guest_debug struct.
910 ioctl_with_ref(self, KVM_SET_GUEST_DEBUG(), &dbg)
911 };
912 if ret == 0 {
913 Ok(())
914 } else {
915 errno_result()
916 }
917 }
918
919 /// KVM does not support the VcpuExit::Cpuid exit type.
handle_cpuid(&mut self, _entry: &CpuIdEntry) -> Result<()>920 fn handle_cpuid(&mut self, _entry: &CpuIdEntry) -> Result<()> {
921 Err(Error::new(ENXIO))
922 }
923
get_tsc_offset(&self) -> Result<u64>924 fn get_tsc_offset(&self) -> Result<u64> {
925 // Use the default MSR-based implementation
926 get_tsc_offset_from_msr(self)
927 }
928
set_tsc_offset(&self, offset: u64) -> Result<()>929 fn set_tsc_offset(&self, offset: u64) -> Result<()> {
930 // Use the default MSR-based implementation
931 set_tsc_offset_via_msr(self, offset)
932 }
933 }
934
935 impl KvmVcpu {
936 /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt Controller".
937 ///
938 /// See the documentation for KVM_GET_LAPIC.
get_lapic(&self) -> Result<kvm_lapic_state>939 pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
940 let mut klapic: kvm_lapic_state = Default::default();
941
942 let ret = unsafe {
943 // The ioctl is unsafe unless you trust the kernel not to write past the end of the
944 // local_apic struct.
945 ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic)
946 };
947 if ret < 0 {
948 return errno_result();
949 }
950 Ok(klapic)
951 }
952
953 /// X86 specific call to set the state of the "Local Advanced Programmable Interrupt Controller".
954 ///
955 /// See the documentation for KVM_SET_LAPIC.
set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()>956 pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
957 let ret = unsafe {
958 // The ioctl is safe because the kernel will only read from the klapic struct.
959 ioctl_with_ref(self, KVM_SET_LAPIC(), klapic)
960 };
961 if ret < 0 {
962 return errno_result();
963 }
964 Ok(())
965 }
966 }
967
968 impl<'a> From<&'a KvmCpuId> for CpuId {
from(kvm_cpuid: &'a KvmCpuId) -> CpuId969 fn from(kvm_cpuid: &'a KvmCpuId) -> CpuId {
970 let kvm_entries = kvm_cpuid.entries_slice();
971 let mut cpu_id_entries = Vec::with_capacity(kvm_entries.len());
972
973 for entry in kvm_entries {
974 let cpu_id_entry = CpuIdEntry {
975 function: entry.function,
976 index: entry.index,
977 flags: entry.flags,
978 cpuid: CpuidResult {
979 eax: entry.eax,
980 ebx: entry.ebx,
981 ecx: entry.ecx,
982 edx: entry.edx,
983 },
984 };
985 cpu_id_entries.push(cpu_id_entry)
986 }
987 CpuId { cpu_id_entries }
988 }
989 }
990
991 impl From<&CpuId> for KvmCpuId {
from(cpuid: &CpuId) -> KvmCpuId992 fn from(cpuid: &CpuId) -> KvmCpuId {
993 let mut kvm = KvmCpuId::new(cpuid.cpu_id_entries.len());
994 let entries = kvm.mut_entries_slice();
995 for (i, &e) in cpuid.cpu_id_entries.iter().enumerate() {
996 entries[i] = kvm_cpuid_entry2 {
997 function: e.function,
998 index: e.index,
999 flags: e.flags,
1000 eax: e.cpuid.eax,
1001 ebx: e.cpuid.ebx,
1002 ecx: e.cpuid.ecx,
1003 edx: e.cpuid.edx,
1004 ..Default::default()
1005 };
1006 }
1007 kvm
1008 }
1009 }
1010
1011 impl From<ClockState> for kvm_clock_data {
from(state: ClockState) -> Self1012 fn from(state: ClockState) -> Self {
1013 kvm_clock_data {
1014 clock: state.clock,
1015 flags: state.flags,
1016 ..Default::default()
1017 }
1018 }
1019 }
1020
1021 impl From<kvm_clock_data> for ClockState {
from(clock_data: kvm_clock_data) -> Self1022 fn from(clock_data: kvm_clock_data) -> Self {
1023 ClockState {
1024 clock: clock_data.clock,
1025 flags: clock_data.flags,
1026 }
1027 }
1028 }
1029
1030 impl From<&kvm_pic_state> for PicState {
from(item: &kvm_pic_state) -> Self1031 fn from(item: &kvm_pic_state) -> Self {
1032 PicState {
1033 last_irr: item.last_irr,
1034 irr: item.irr,
1035 imr: item.imr,
1036 isr: item.isr,
1037 priority_add: item.priority_add,
1038 irq_base: item.irq_base,
1039 read_reg_select: item.read_reg_select != 0,
1040 poll: item.poll != 0,
1041 special_mask: item.special_mask != 0,
1042 init_state: item.init_state.into(),
1043 auto_eoi: item.auto_eoi != 0,
1044 rotate_on_auto_eoi: item.rotate_on_auto_eoi != 0,
1045 special_fully_nested_mode: item.special_fully_nested_mode != 0,
1046 use_4_byte_icw: item.init4 != 0,
1047 elcr: item.elcr,
1048 elcr_mask: item.elcr_mask,
1049 }
1050 }
1051 }
1052
1053 impl From<&PicState> for kvm_pic_state {
from(item: &PicState) -> Self1054 fn from(item: &PicState) -> Self {
1055 kvm_pic_state {
1056 last_irr: item.last_irr,
1057 irr: item.irr,
1058 imr: item.imr,
1059 isr: item.isr,
1060 priority_add: item.priority_add,
1061 irq_base: item.irq_base,
1062 read_reg_select: item.read_reg_select as u8,
1063 poll: item.poll as u8,
1064 special_mask: item.special_mask as u8,
1065 init_state: item.init_state as u8,
1066 auto_eoi: item.auto_eoi as u8,
1067 rotate_on_auto_eoi: item.rotate_on_auto_eoi as u8,
1068 special_fully_nested_mode: item.special_fully_nested_mode as u8,
1069 init4: item.use_4_byte_icw as u8,
1070 elcr: item.elcr,
1071 elcr_mask: item.elcr_mask,
1072 }
1073 }
1074 }
1075
1076 impl From<&kvm_ioapic_state> for IoapicState {
from(item: &kvm_ioapic_state) -> Self1077 fn from(item: &kvm_ioapic_state) -> Self {
1078 let mut state = IoapicState {
1079 base_address: item.base_address,
1080 ioregsel: item.ioregsel as u8,
1081 ioapicid: item.id,
1082 current_interrupt_level_bitmap: item.irr,
1083 redirect_table: [IoapicRedirectionTableEntry::default(); 120],
1084 };
1085 for (in_state, out_state) in item.redirtbl.iter().zip(state.redirect_table.iter_mut()) {
1086 *out_state = in_state.into();
1087 }
1088 state
1089 }
1090 }
1091
1092 impl From<&IoapicRedirectionTableEntry> for kvm_ioapic_state__bindgen_ty_1 {
from(item: &IoapicRedirectionTableEntry) -> Self1093 fn from(item: &IoapicRedirectionTableEntry) -> Self {
1094 kvm_ioapic_state__bindgen_ty_1 {
1095 // IoapicRedirectionTableEntry layout matches the exact bit layout of a hardware
1096 // ioapic redirection table entry, so we can simply do a 64-bit copy
1097 bits: item.get(0, 64),
1098 }
1099 }
1100 }
1101
1102 impl From<&kvm_ioapic_state__bindgen_ty_1> for IoapicRedirectionTableEntry {
from(item: &kvm_ioapic_state__bindgen_ty_1) -> Self1103 fn from(item: &kvm_ioapic_state__bindgen_ty_1) -> Self {
1104 let mut entry = IoapicRedirectionTableEntry::default();
1105 // Safe because the 64-bit layout of the IoapicRedirectionTableEntry matches the kvm_sys
1106 // table entry layout
1107 entry.set(0, 64, unsafe { item.bits as u64 });
1108 entry
1109 }
1110 }
1111
1112 impl From<&IoapicState> for kvm_ioapic_state {
from(item: &IoapicState) -> Self1113 fn from(item: &IoapicState) -> Self {
1114 let mut state = kvm_ioapic_state {
1115 base_address: item.base_address,
1116 ioregsel: item.ioregsel as u32,
1117 id: item.ioapicid,
1118 irr: item.current_interrupt_level_bitmap,
1119 ..Default::default()
1120 };
1121 for (in_state, out_state) in item.redirect_table.iter().zip(state.redirtbl.iter_mut()) {
1122 *out_state = in_state.into();
1123 }
1124 state
1125 }
1126 }
1127
1128 impl From<&LapicState> for kvm_lapic_state {
from(item: &LapicState) -> Self1129 fn from(item: &LapicState) -> Self {
1130 let mut state = kvm_lapic_state::default();
1131 // There are 64 lapic registers
1132 for (reg, value) in item.regs.iter().enumerate() {
1133 // Each lapic register is 16 bytes, but only the first 4 are used
1134 let reg_offset = 16 * reg;
1135 let regs_slice = &mut state.regs[reg_offset..reg_offset + 4];
1136
1137 // to_le_bytes() produces an array of u8, not i8(c_char), so we can't directly use
1138 // copy_from_slice().
1139 for (i, v) in value.to_le_bytes().iter().enumerate() {
1140 regs_slice[i] = *v as i8;
1141 }
1142 }
1143 state
1144 }
1145 }
1146
1147 impl From<&kvm_lapic_state> for LapicState {
from(item: &kvm_lapic_state) -> Self1148 fn from(item: &kvm_lapic_state) -> Self {
1149 let mut state = LapicState { regs: [0; 64] };
1150 // There are 64 lapic registers
1151 for reg in 0..64 {
1152 // Each lapic register is 16 bytes, but only the first 4 are used
1153 let reg_offset = 16 * reg;
1154
1155 // from_le_bytes() only works on arrays of u8, not i8(c_char).
1156 let reg_slice = &item.regs[reg_offset..reg_offset + 4];
1157 let mut bytes = [0u8; 4];
1158 for i in 0..4 {
1159 bytes[i] = reg_slice[i] as u8;
1160 }
1161 state.regs[reg] = u32::from_le_bytes(bytes);
1162 }
1163 state
1164 }
1165 }
1166
1167 impl From<&PitState> for kvm_pit_state2 {
from(item: &PitState) -> Self1168 fn from(item: &PitState) -> Self {
1169 kvm_pit_state2 {
1170 channels: [
1171 kvm_pit_channel_state::from(&item.channels[0]),
1172 kvm_pit_channel_state::from(&item.channels[1]),
1173 kvm_pit_channel_state::from(&item.channels[2]),
1174 ],
1175 flags: item.flags,
1176 ..Default::default()
1177 }
1178 }
1179 }
1180
1181 impl From<&kvm_pit_state2> for PitState {
from(item: &kvm_pit_state2) -> Self1182 fn from(item: &kvm_pit_state2) -> Self {
1183 PitState {
1184 channels: [
1185 PitChannelState::from(&item.channels[0]),
1186 PitChannelState::from(&item.channels[1]),
1187 PitChannelState::from(&item.channels[2]),
1188 ],
1189 flags: item.flags,
1190 }
1191 }
1192 }
1193
1194 impl From<&PitChannelState> for kvm_pit_channel_state {
from(item: &PitChannelState) -> Self1195 fn from(item: &PitChannelState) -> Self {
1196 kvm_pit_channel_state {
1197 count: item.count,
1198 latched_count: item.latched_count,
1199 count_latched: item.count_latched as u8,
1200 status_latched: item.status_latched as u8,
1201 status: item.status,
1202 read_state: item.read_state as u8,
1203 write_state: item.write_state as u8,
1204 // kvm's write_latch only stores the low byte of the reload value
1205 write_latch: item.reload_value as u8,
1206 rw_mode: item.rw_mode as u8,
1207 mode: item.mode,
1208 bcd: item.bcd as u8,
1209 gate: item.gate as u8,
1210 count_load_time: item.count_load_time as i64,
1211 }
1212 }
1213 }
1214
1215 impl From<&kvm_pit_channel_state> for PitChannelState {
from(item: &kvm_pit_channel_state) -> Self1216 fn from(item: &kvm_pit_channel_state) -> Self {
1217 PitChannelState {
1218 count: item.count,
1219 latched_count: item.latched_count,
1220 count_latched: item.count_latched.into(),
1221 status_latched: item.status_latched != 0,
1222 status: item.status,
1223 read_state: item.read_state.into(),
1224 write_state: item.write_state.into(),
1225 // kvm's write_latch only stores the low byte of the reload value
1226 reload_value: item.write_latch as u16,
1227 rw_mode: item.rw_mode.into(),
1228 mode: item.mode,
1229 bcd: item.bcd != 0,
1230 gate: item.gate != 0,
1231 count_load_time: item.count_load_time as u64,
1232 }
1233 }
1234 }
1235
1236 // This function translates an IrqSrouceChip to the kvm u32 equivalent. It has a different
1237 // implementation between x86_64 and aarch64 because the irqchip KVM constants are not defined on
1238 // all architectures.
chip_to_kvm_chip(chip: IrqSourceChip) -> u321239 pub(super) fn chip_to_kvm_chip(chip: IrqSourceChip) -> u32 {
1240 match chip {
1241 IrqSourceChip::PicPrimary => KVM_IRQCHIP_PIC_MASTER,
1242 IrqSourceChip::PicSecondary => KVM_IRQCHIP_PIC_SLAVE,
1243 IrqSourceChip::Ioapic => KVM_IRQCHIP_IOAPIC,
1244 _ => {
1245 error!("Invalid IrqChipSource for X86 {:?}", chip);
1246 0
1247 }
1248 }
1249 }
1250
1251 impl From<&kvm_regs> for Regs {
from(r: &kvm_regs) -> Self1252 fn from(r: &kvm_regs) -> Self {
1253 Regs {
1254 rax: r.rax,
1255 rbx: r.rbx,
1256 rcx: r.rcx,
1257 rdx: r.rdx,
1258 rsi: r.rsi,
1259 rdi: r.rdi,
1260 rsp: r.rsp,
1261 rbp: r.rbp,
1262 r8: r.r8,
1263 r9: r.r9,
1264 r10: r.r10,
1265 r11: r.r11,
1266 r12: r.r12,
1267 r13: r.r13,
1268 r14: r.r14,
1269 r15: r.r15,
1270 rip: r.rip,
1271 rflags: r.rflags,
1272 }
1273 }
1274 }
1275
1276 impl From<&Regs> for kvm_regs {
from(r: &Regs) -> Self1277 fn from(r: &Regs) -> Self {
1278 kvm_regs {
1279 rax: r.rax,
1280 rbx: r.rbx,
1281 rcx: r.rcx,
1282 rdx: r.rdx,
1283 rsi: r.rsi,
1284 rdi: r.rdi,
1285 rsp: r.rsp,
1286 rbp: r.rbp,
1287 r8: r.r8,
1288 r9: r.r9,
1289 r10: r.r10,
1290 r11: r.r11,
1291 r12: r.r12,
1292 r13: r.r13,
1293 r14: r.r14,
1294 r15: r.r15,
1295 rip: r.rip,
1296 rflags: r.rflags,
1297 }
1298 }
1299 }
1300
1301 impl From<&VcpuEvents> for kvm_vcpu_events {
from(ve: &VcpuEvents) -> Self1302 fn from(ve: &VcpuEvents) -> Self {
1303 let mut kvm_ve: kvm_vcpu_events = Default::default();
1304
1305 kvm_ve.exception.injected = ve.exception.injected as u8;
1306 kvm_ve.exception.nr = ve.exception.nr;
1307 kvm_ve.exception.has_error_code = ve.exception.has_error_code as u8;
1308 if let Some(pending) = ve.exception.pending {
1309 kvm_ve.exception.pending = pending as u8;
1310 if ve.exception_payload.is_some() {
1311 kvm_ve.exception_has_payload = true as u8;
1312 }
1313 kvm_ve.exception_payload = ve.exception_payload.unwrap_or(0);
1314 kvm_ve.flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
1315 }
1316 kvm_ve.exception.error_code = ve.exception.error_code;
1317
1318 kvm_ve.interrupt.injected = ve.interrupt.injected as u8;
1319 kvm_ve.interrupt.nr = ve.interrupt.nr;
1320 kvm_ve.interrupt.soft = ve.interrupt.soft as u8;
1321 if let Some(shadow) = ve.interrupt.shadow {
1322 kvm_ve.interrupt.shadow = shadow;
1323 kvm_ve.flags |= KVM_VCPUEVENT_VALID_SHADOW;
1324 }
1325
1326 kvm_ve.nmi.injected = ve.nmi.injected as u8;
1327 if let Some(pending) = ve.nmi.pending {
1328 kvm_ve.nmi.pending = pending as u8;
1329 kvm_ve.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
1330 }
1331 kvm_ve.nmi.masked = ve.nmi.masked as u8;
1332
1333 if let Some(sipi_vector) = ve.sipi_vector {
1334 kvm_ve.sipi_vector = sipi_vector;
1335 kvm_ve.flags |= KVM_VCPUEVENT_VALID_SIPI_VECTOR;
1336 }
1337
1338 if let Some(smm) = ve.smi.smm {
1339 kvm_ve.smi.smm = smm as u8;
1340 kvm_ve.flags |= KVM_VCPUEVENT_VALID_SMM;
1341 }
1342 kvm_ve.smi.pending = ve.smi.pending as u8;
1343 kvm_ve.smi.smm_inside_nmi = ve.smi.smm_inside_nmi as u8;
1344 kvm_ve.smi.latched_init = ve.smi.latched_init;
1345
1346 if let Some(pending) = ve.triple_fault.pending {
1347 kvm_ve.triple_fault.pending = pending as u8;
1348 kvm_ve.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
1349 }
1350 kvm_ve
1351 }
1352 }
1353
1354 impl From<&kvm_vcpu_events> for VcpuEvents {
from(ve: &kvm_vcpu_events) -> Self1355 fn from(ve: &kvm_vcpu_events) -> Self {
1356 let exception = VcpuExceptionState {
1357 injected: ve.exception.injected != 0,
1358 nr: ve.exception.nr,
1359 has_error_code: ve.exception.has_error_code != 0,
1360 pending: if ve.flags & KVM_VCPUEVENT_VALID_PAYLOAD != 0 {
1361 Some(ve.exception.pending != 0)
1362 } else {
1363 None
1364 },
1365 error_code: ve.exception.error_code,
1366 };
1367
1368 let interrupt = VcpuInterruptState {
1369 injected: ve.interrupt.injected != 0,
1370 nr: ve.interrupt.nr,
1371 soft: ve.interrupt.soft != 0,
1372 shadow: if ve.flags & KVM_VCPUEVENT_VALID_SHADOW != 0 {
1373 Some(ve.interrupt.shadow)
1374 } else {
1375 None
1376 },
1377 };
1378
1379 let nmi = VcpuNmiState {
1380 injected: ve.interrupt.injected != 0,
1381 pending: if ve.flags & KVM_VCPUEVENT_VALID_NMI_PENDING != 0 {
1382 Some(ve.nmi.pending != 0)
1383 } else {
1384 None
1385 },
1386 masked: ve.nmi.masked != 0,
1387 };
1388
1389 let sipi_vector = if ve.flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR != 0 {
1390 Some(ve.sipi_vector)
1391 } else {
1392 None
1393 };
1394
1395 let smi = VcpuSmiState {
1396 smm: if ve.flags & KVM_VCPUEVENT_VALID_SMM != 0 {
1397 Some(ve.smi.smm != 0)
1398 } else {
1399 None
1400 },
1401 pending: ve.smi.pending != 0,
1402 smm_inside_nmi: ve.smi.smm_inside_nmi != 0,
1403 latched_init: ve.smi.latched_init,
1404 };
1405
1406 let triple_fault = VcpuTripleFaultState {
1407 pending: if ve.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT != 0 {
1408 Some(ve.triple_fault.pending != 0)
1409 } else {
1410 None
1411 },
1412 };
1413
1414 let exception_payload = if ve.flags & KVM_VCPUEVENT_VALID_PAYLOAD != 0 {
1415 Some(ve.exception_payload)
1416 } else {
1417 None
1418 };
1419
1420 VcpuEvents {
1421 exception,
1422 interrupt,
1423 nmi,
1424 sipi_vector,
1425 smi,
1426 triple_fault,
1427 exception_payload,
1428 }
1429 }
1430 }
1431
1432 impl From<&kvm_segment> for Segment {
from(s: &kvm_segment) -> Self1433 fn from(s: &kvm_segment) -> Self {
1434 Segment {
1435 base: s.base,
1436 limit: s.limit,
1437 selector: s.selector,
1438 type_: s.type_,
1439 present: s.present,
1440 dpl: s.dpl,
1441 db: s.db,
1442 s: s.s,
1443 l: s.l,
1444 g: s.g,
1445 avl: s.avl,
1446 }
1447 }
1448 }
1449
1450 impl From<&Segment> for kvm_segment {
from(s: &Segment) -> Self1451 fn from(s: &Segment) -> Self {
1452 kvm_segment {
1453 base: s.base,
1454 limit: s.limit,
1455 selector: s.selector,
1456 type_: s.type_,
1457 present: s.present,
1458 dpl: s.dpl,
1459 db: s.db,
1460 s: s.s,
1461 l: s.l,
1462 g: s.g,
1463 avl: s.avl,
1464 unusable: match s.present {
1465 0 => 1,
1466 _ => 0,
1467 },
1468 ..Default::default()
1469 }
1470 }
1471 }
1472
1473 impl From<&kvm_dtable> for DescriptorTable {
from(dt: &kvm_dtable) -> Self1474 fn from(dt: &kvm_dtable) -> Self {
1475 DescriptorTable {
1476 base: dt.base,
1477 limit: dt.limit,
1478 }
1479 }
1480 }
1481
1482 impl From<&DescriptorTable> for kvm_dtable {
from(dt: &DescriptorTable) -> Self1483 fn from(dt: &DescriptorTable) -> Self {
1484 kvm_dtable {
1485 base: dt.base,
1486 limit: dt.limit,
1487 ..Default::default()
1488 }
1489 }
1490 }
1491
1492 impl From<&kvm_sregs> for Sregs {
from(r: &kvm_sregs) -> Self1493 fn from(r: &kvm_sregs) -> Self {
1494 Sregs {
1495 cs: Segment::from(&r.cs),
1496 ds: Segment::from(&r.ds),
1497 es: Segment::from(&r.es),
1498 fs: Segment::from(&r.fs),
1499 gs: Segment::from(&r.gs),
1500 ss: Segment::from(&r.ss),
1501 tr: Segment::from(&r.tr),
1502 ldt: Segment::from(&r.ldt),
1503 gdt: DescriptorTable::from(&r.gdt),
1504 idt: DescriptorTable::from(&r.idt),
1505 cr0: r.cr0,
1506 cr2: r.cr2,
1507 cr3: r.cr3,
1508 cr4: r.cr4,
1509 cr8: r.cr8,
1510 efer: r.efer,
1511 }
1512 }
1513 }
1514
1515 impl From<&kvm_fpu> for Fpu {
from(r: &kvm_fpu) -> Self1516 fn from(r: &kvm_fpu) -> Self {
1517 Fpu {
1518 fpr: r.fpr,
1519 fcw: r.fcw,
1520 fsw: r.fsw,
1521 ftwx: r.ftwx,
1522 last_opcode: r.last_opcode,
1523 last_ip: r.last_ip,
1524 last_dp: r.last_dp,
1525 xmm: r.xmm,
1526 mxcsr: r.mxcsr,
1527 }
1528 }
1529 }
1530
1531 impl From<&Fpu> for kvm_fpu {
from(r: &Fpu) -> Self1532 fn from(r: &Fpu) -> Self {
1533 kvm_fpu {
1534 fpr: r.fpr,
1535 fcw: r.fcw,
1536 fsw: r.fsw,
1537 ftwx: r.ftwx,
1538 last_opcode: r.last_opcode,
1539 last_ip: r.last_ip,
1540 last_dp: r.last_dp,
1541 xmm: r.xmm,
1542 mxcsr: r.mxcsr,
1543 ..Default::default()
1544 }
1545 }
1546 }
1547
1548 impl Xsave {
from(r: Vec<u32>) -> Self1549 fn from(r: Vec<u32>) -> Self {
1550 Xsave(r)
1551 }
1552 }
1553
1554 impl From<&kvm_debugregs> for DebugRegs {
from(r: &kvm_debugregs) -> Self1555 fn from(r: &kvm_debugregs) -> Self {
1556 DebugRegs {
1557 db: r.db,
1558 dr6: r.dr6,
1559 dr7: r.dr7,
1560 }
1561 }
1562 }
1563
1564 impl From<&DebugRegs> for kvm_debugregs {
from(r: &DebugRegs) -> Self1565 fn from(r: &DebugRegs) -> Self {
1566 kvm_debugregs {
1567 db: r.db,
1568 dr6: r.dr6,
1569 dr7: r.dr7,
1570 ..Default::default()
1571 }
1572 }
1573 }
1574
from_kvm_xcrs(r: &kvm_xcrs) -> Vec<Register>1575 fn from_kvm_xcrs(r: &kvm_xcrs) -> Vec<Register> {
1576 r.xcrs
1577 .iter()
1578 .take(r.nr_xcrs as usize)
1579 .map(|x| Register {
1580 id: x.xcr,
1581 value: x.value,
1582 })
1583 .collect()
1584 }
1585
to_kvm_xcrs(r: &[Register]) -> kvm_xcrs1586 fn to_kvm_xcrs(r: &[Register]) -> kvm_xcrs {
1587 let mut kvm = kvm_xcrs {
1588 nr_xcrs: r.len() as u32,
1589 ..Default::default()
1590 };
1591 for (i, &xcr) in r.iter().enumerate() {
1592 kvm.xcrs[i].xcr = xcr.id as u32;
1593 kvm.xcrs[i].value = xcr.value;
1594 }
1595 kvm
1596 }
1597
to_kvm_msrs(vec: &[Register]) -> Vec<kvm_msrs>1598 fn to_kvm_msrs(vec: &[Register]) -> Vec<kvm_msrs> {
1599 let vec: Vec<kvm_msr_entry> = vec
1600 .iter()
1601 .map(|e| kvm_msr_entry {
1602 index: e.id as u32,
1603 data: e.value,
1604 ..Default::default()
1605 })
1606 .collect();
1607
1608 let mut msrs = vec_with_array_field::<kvm_msrs, kvm_msr_entry>(vec.len());
1609 unsafe {
1610 // Mapping the unsized array to a slice is unsafe because the length isn't known.
1611 // Providing the length used to create the struct guarantees the entire slice is valid.
1612 msrs[0]
1613 .entries
1614 .as_mut_slice(vec.len())
1615 .copy_from_slice(&vec);
1616 }
1617 msrs[0].nmsrs = vec.len() as u32;
1618 msrs
1619 }
1620
1621 #[cfg(test)]
1622 mod tests {
1623 use super::*;
1624
1625 #[test]
vcpu_event_to_from()1626 fn vcpu_event_to_from() {
1627 // All data is random.
1628 let mut kvm_ve: kvm_vcpu_events = Default::default();
1629 kvm_ve.exception.injected = 1;
1630 kvm_ve.exception.nr = 65;
1631 kvm_ve.exception.has_error_code = 1;
1632 kvm_ve.exception.error_code = 110;
1633 kvm_ve.exception.pending = 1;
1634
1635 kvm_ve.interrupt.injected = 1;
1636 kvm_ve.interrupt.nr = 100;
1637 kvm_ve.interrupt.soft = 1;
1638 kvm_ve.interrupt.shadow = 114;
1639
1640 kvm_ve.nmi.injected = 1;
1641 kvm_ve.nmi.pending = 1;
1642 kvm_ve.nmi.masked = 0;
1643
1644 kvm_ve.sipi_vector = 105;
1645
1646 kvm_ve.smi.smm = 1;
1647 kvm_ve.smi.pending = 1;
1648 kvm_ve.smi.smm_inside_nmi = 1;
1649 kvm_ve.smi.latched_init = 100;
1650
1651 kvm_ve.triple_fault.pending = 0;
1652
1653 kvm_ve.exception_payload = 33;
1654 kvm_ve.exception_has_payload = 1;
1655
1656 kvm_ve.flags = 0
1657 | KVM_VCPUEVENT_VALID_PAYLOAD
1658 | KVM_VCPUEVENT_VALID_SMM
1659 | KVM_VCPUEVENT_VALID_NMI_PENDING
1660 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
1661 | KVM_VCPUEVENT_VALID_SHADOW;
1662
1663 let ve: VcpuEvents = VcpuEvents::from(&kvm_ve);
1664 assert_eq!(ve.exception.injected, true);
1665 assert_eq!(ve.exception.nr, 65);
1666 assert_eq!(ve.exception.has_error_code, true);
1667 assert_eq!(ve.exception.error_code, 110);
1668 assert_eq!(ve.exception.pending.unwrap(), true);
1669
1670 assert_eq!(ve.interrupt.injected, true);
1671 assert_eq!(ve.interrupt.nr, 100);
1672 assert_eq!(ve.interrupt.soft, true);
1673 assert_eq!(ve.interrupt.shadow.unwrap(), 114);
1674
1675 assert_eq!(ve.nmi.injected, true);
1676 assert_eq!(ve.nmi.pending.unwrap(), true);
1677 assert_eq!(ve.nmi.masked, false);
1678
1679 assert_eq!(ve.sipi_vector.unwrap(), 105);
1680
1681 assert_eq!(ve.smi.smm.unwrap(), true);
1682 assert_eq!(ve.smi.pending, true);
1683 assert_eq!(ve.smi.smm_inside_nmi, true);
1684 assert_eq!(ve.smi.latched_init, 100);
1685
1686 assert_eq!(ve.triple_fault.pending, None);
1687
1688 assert_eq!(ve.exception_payload.unwrap(), 33);
1689
1690 let kvm_ve_restored: kvm_vcpu_events = kvm_vcpu_events::from(&ve);
1691 assert_eq!(kvm_ve_restored.exception.injected, 1);
1692 assert_eq!(kvm_ve_restored.exception.nr, 65);
1693 assert_eq!(kvm_ve_restored.exception.has_error_code, 1);
1694 assert_eq!(kvm_ve_restored.exception.error_code, 110);
1695 assert_eq!(kvm_ve_restored.exception.pending, 1);
1696
1697 assert_eq!(kvm_ve_restored.interrupt.injected, 1);
1698 assert_eq!(kvm_ve_restored.interrupt.nr, 100);
1699 assert_eq!(kvm_ve_restored.interrupt.soft, 1);
1700 assert_eq!(kvm_ve_restored.interrupt.shadow, 114);
1701
1702 assert_eq!(kvm_ve_restored.nmi.injected, 1);
1703 assert_eq!(kvm_ve_restored.nmi.pending, 1);
1704 assert_eq!(kvm_ve_restored.nmi.masked, 0);
1705
1706 assert_eq!(kvm_ve_restored.sipi_vector, 105);
1707
1708 assert_eq!(kvm_ve_restored.smi.smm, 1);
1709 assert_eq!(kvm_ve_restored.smi.pending, 1);
1710 assert_eq!(kvm_ve_restored.smi.smm_inside_nmi, 1);
1711 assert_eq!(kvm_ve_restored.smi.latched_init, 100);
1712
1713 assert_eq!(kvm_ve_restored.triple_fault.pending, 0);
1714
1715 assert_eq!(kvm_ve_restored.exception_payload, 33);
1716 assert_eq!(kvm_ve_restored.exception_has_payload, 1);
1717 }
1718 }
1719