• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::alloc::Layout;
6 use std::fmt::{self, Display};
7 use std::{mem, result};
8 
9 use assertions::const_assert;
10 use kvm;
11 use kvm_sys::kvm_fpu;
12 use kvm_sys::kvm_msr_entry;
13 use kvm_sys::kvm_msrs;
14 use kvm_sys::kvm_regs;
15 use kvm_sys::kvm_sregs;
16 use sys_util::{self, GuestAddress, GuestMemory, LayoutAllocation};
17 
18 use crate::gdt;
19 
20 #[derive(Debug)]
21 pub enum Error {
22     /// Setting up msrs failed.
23     MsrIoctlFailed(sys_util::Error),
24     /// Failed to configure the FPU.
25     FpuIoctlFailed(sys_util::Error),
26     /// Failed to get sregs for this cpu.
27     GetSRegsIoctlFailed(sys_util::Error),
28     /// Failed to set base registers for this cpu.
29     SettingRegistersIoctl(sys_util::Error),
30     /// Failed to set sregs for this cpu.
31     SetSRegsIoctlFailed(sys_util::Error),
32     /// Writing the GDT to RAM failed.
33     WriteGDTFailure,
34     /// Writing the IDT to RAM failed.
35     WriteIDTFailure,
36     /// Writing PML4 to RAM failed.
37     WritePML4Address,
38     /// Writing PDPTE to RAM failed.
39     WritePDPTEAddress,
40     /// Writing PDE to RAM failed.
41     WritePDEAddress,
42 }
43 pub type Result<T> = result::Result<T, Error>;
44 
45 impl std::error::Error for Error {}
46 
47 impl Display for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result48     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
49         use self::Error::*;
50 
51         match self {
52             MsrIoctlFailed(e) => write!(f, "setting up msrs failed: {}", e),
53             FpuIoctlFailed(e) => write!(f, "failed to configure the FPU: {}", e),
54             GetSRegsIoctlFailed(e) => write!(f, "failed to get sregs for this cpu: {}", e),
55             SettingRegistersIoctl(e) => {
56                 write!(f, "failed to set base registers for this cpu: {}", e)
57             }
58             SetSRegsIoctlFailed(e) => write!(f, "failed to set sregs for this cpu: {}", e),
59             WriteGDTFailure => write!(f, "writing the GDT to RAM failed"),
60             WriteIDTFailure => write!(f, "writing the IDT to RAM failed"),
61             WritePML4Address => write!(f, "writing PML4 to RAM failed"),
62             WritePDPTEAddress => write!(f, "writing PDPTE to RAM failed"),
63             WritePDEAddress => write!(f, "writing PDE to RAM failed"),
64         }
65     }
66 }
67 
create_msr_entries() -> Vec<kvm_msr_entry>68 fn create_msr_entries() -> Vec<kvm_msr_entry> {
69     let mut entries = Vec::<kvm_msr_entry>::new();
70 
71     entries.push(kvm_msr_entry {
72         index: crate::msr_index::MSR_IA32_SYSENTER_CS,
73         data: 0x0,
74         ..Default::default()
75     });
76     entries.push(kvm_msr_entry {
77         index: crate::msr_index::MSR_IA32_SYSENTER_ESP,
78         data: 0x0,
79         ..Default::default()
80     });
81     entries.push(kvm_msr_entry {
82         index: crate::msr_index::MSR_IA32_SYSENTER_EIP,
83         data: 0x0,
84         ..Default::default()
85     });
86     // x86_64 specific msrs, we only run on x86_64 not x86
87     entries.push(kvm_msr_entry {
88         index: crate::msr_index::MSR_STAR,
89         data: 0x0,
90         ..Default::default()
91     });
92     entries.push(kvm_msr_entry {
93         index: crate::msr_index::MSR_CSTAR,
94         data: 0x0,
95         ..Default::default()
96     });
97     entries.push(kvm_msr_entry {
98         index: crate::msr_index::MSR_KERNEL_GS_BASE,
99         data: 0x0,
100         ..Default::default()
101     });
102     entries.push(kvm_msr_entry {
103         index: crate::msr_index::MSR_SYSCALL_MASK,
104         data: 0x0,
105         ..Default::default()
106     });
107     entries.push(kvm_msr_entry {
108         index: crate::msr_index::MSR_LSTAR,
109         data: 0x0,
110         ..Default::default()
111     });
112     // end of x86_64 specific code
113     entries.push(kvm_msr_entry {
114         index: crate::msr_index::MSR_IA32_TSC,
115         data: 0x0,
116         ..Default::default()
117     });
118     entries.push(kvm_msr_entry {
119         index: crate::msr_index::MSR_IA32_MISC_ENABLE,
120         data: crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
121         ..Default::default()
122     });
123 
124     entries
125 }
126 
127 /// Configure Model specific registers for x86
128 ///
129 /// # Arguments
130 ///
131 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
setup_msrs(vcpu: &kvm::Vcpu) -> Result<()>132 pub fn setup_msrs(vcpu: &kvm::Vcpu) -> Result<()> {
133     const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
134     const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
135     const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
136     const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>();
137     const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY);
138 
139     let entry_vec = create_msr_entries();
140     let size = SIZE_OF_MSRS + entry_vec.len() * SIZE_OF_ENTRY;
141     let layout = Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
142     let mut allocation = LayoutAllocation::zeroed(layout);
143 
144     // Safe to obtain an exclusive reference because there are no other
145     // references to the allocation yet and all-zero is a valid bit pattern.
146     let msrs = unsafe { allocation.as_mut::<kvm_msrs>() };
147 
148     unsafe {
149         // Mapping the unsized array to a slice is unsafe becase the length isn't known.  Providing
150         // the length used to create the struct guarantees the entire slice is valid.
151         let entries: &mut [kvm_msr_entry] = msrs.entries.as_mut_slice(entry_vec.len());
152         entries.copy_from_slice(&entry_vec);
153     }
154     msrs.nmsrs = entry_vec.len() as u32;
155 
156     vcpu.set_msrs(msrs).map_err(Error::MsrIoctlFailed)?;
157 
158     Ok(())
159 
160     // msrs allocation is deallocated.
161 }
162 
163 /// Configure FPU registers for x86
164 ///
165 /// # Arguments
166 ///
167 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
setup_fpu(vcpu: &kvm::Vcpu) -> Result<()>168 pub fn setup_fpu(vcpu: &kvm::Vcpu) -> Result<()> {
169     let fpu: kvm_fpu = kvm_fpu {
170         fcw: 0x37f,
171         mxcsr: 0x1f80,
172         ..Default::default()
173     };
174 
175     vcpu.set_fpu(&fpu).map_err(Error::FpuIoctlFailed)?;
176 
177     Ok(())
178 }
179 
180 /// Configure base registers for x86
181 ///
182 /// # Arguments
183 ///
184 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
185 /// * `boot_ip` - Starting instruction pointer.
186 /// * `boot_sp` - Starting stack pointer.
187 /// * `boot_si` - Must point to zero page address per Linux ABI.
setup_regs(vcpu: &kvm::Vcpu, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()>188 pub fn setup_regs(vcpu: &kvm::Vcpu, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()> {
189     let regs: kvm_regs = kvm_regs {
190         rflags: 0x0000000000000002u64,
191         rip: boot_ip,
192         rsp: boot_sp,
193         rbp: boot_sp,
194         rsi: boot_si,
195         ..Default::default()
196     };
197 
198     vcpu.set_regs(&regs).map_err(Error::SettingRegistersIoctl)?;
199 
200     Ok(())
201 }
202 
203 const X86_CR0_PE: u64 = 0x1;
204 const X86_CR0_PG: u64 = 0x80000000;
205 const X86_CR4_PAE: u64 = 0x20;
206 
207 const EFER_LME: u64 = 0x100;
208 const EFER_LMA: u64 = 0x400;
209 
210 const BOOT_GDT_OFFSET: u64 = 0x500;
211 const BOOT_IDT_OFFSET: u64 = 0x520;
212 
213 const BOOT_GDT_MAX: usize = 4;
214 
write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()>215 fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
216     let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
217     for (index, entry) in table.iter().enumerate() {
218         let addr = guest_mem
219             .checked_offset(boot_gdt_addr, (index * mem::size_of::<u64>()) as u64)
220             .ok_or(Error::WriteGDTFailure)?;
221         guest_mem
222             .write_obj_at_addr(*entry, addr)
223             .map_err(|_| Error::WriteGDTFailure)?;
224     }
225     Ok(())
226 }
227 
write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()>228 fn write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()> {
229     let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
230     guest_mem
231         .write_obj_at_addr(val, boot_idt_addr)
232         .map_err(|_| Error::WriteIDTFailure)
233 }
234 
configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut kvm_sregs) -> Result<()>235 fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut kvm_sregs) -> Result<()> {
236     let gdt_table: [u64; BOOT_GDT_MAX as usize] = [
237         gdt::gdt_entry(0, 0, 0),            // NULL
238         gdt::gdt_entry(0xa09b, 0, 0xfffff), // CODE
239         gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
240         gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
241     ];
242 
243     let code_seg = gdt::kvm_segment_from_gdt(gdt_table[1], 1);
244     let data_seg = gdt::kvm_segment_from_gdt(gdt_table[2], 2);
245     let tss_seg = gdt::kvm_segment_from_gdt(gdt_table[3], 3);
246 
247     // Write segments
248     write_gdt_table(&gdt_table[..], mem)?;
249     sregs.gdt.base = BOOT_GDT_OFFSET as u64;
250     sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
251 
252     write_idt_value(0, mem)?;
253     sregs.idt.base = BOOT_IDT_OFFSET as u64;
254     sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
255 
256     sregs.cs = code_seg;
257     sregs.ds = data_seg;
258     sregs.es = data_seg;
259     sregs.fs = data_seg;
260     sregs.gs = data_seg;
261     sregs.ss = data_seg;
262     sregs.tr = tss_seg;
263 
264     /* 64-bit protected mode */
265     sregs.cr0 |= X86_CR0_PE;
266     sregs.efer |= EFER_LME;
267 
268     Ok(())
269 }
270 
setup_page_tables(mem: &GuestMemory, sregs: &mut kvm_sregs) -> Result<()>271 fn setup_page_tables(mem: &GuestMemory, sregs: &mut kvm_sregs) -> Result<()> {
272     // Puts PML4 right after zero page but aligned to 4k.
273     let boot_pml4_addr = GuestAddress(0x9000);
274     let boot_pdpte_addr = GuestAddress(0xa000);
275     let boot_pde_addr = GuestAddress(0xb000);
276 
277     // Entry covering VA [0..512GB)
278     mem.write_obj_at_addr(boot_pdpte_addr.offset() as u64 | 0x03, boot_pml4_addr)
279         .map_err(|_| Error::WritePML4Address)?;
280 
281     // Entry covering VA [0..1GB)
282     mem.write_obj_at_addr(boot_pde_addr.offset() as u64 | 0x03, boot_pdpte_addr)
283         .map_err(|_| Error::WritePDPTEAddress)?;
284 
285     // 512 2MB entries together covering VA [0..1GB). Note we are assuming
286     // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
287     for i in 0..512 {
288         mem.write_obj_at_addr((i << 21) + 0x83u64, boot_pde_addr.unchecked_add(i * 8))
289             .map_err(|_| Error::WritePDEAddress)?;
290     }
291     sregs.cr3 = boot_pml4_addr.offset() as u64;
292     sregs.cr4 |= X86_CR4_PAE;
293     sregs.cr0 |= X86_CR0_PG;
294     sregs.efer |= EFER_LMA; // Long mode is active. Must be auto-enabled with CR0_PG.
295     Ok(())
296 }
297 
298 /// Configures the segment registers and system page tables for a given CPU.
299 ///
300 /// # Arguments
301 ///
302 /// * `mem` - The memory that will be passed to the guest.
303 /// * `vcpu_fd` - The FD returned from the KVM_CREATE_VCPU ioctl.
setup_sregs(mem: &GuestMemory, vcpu: &kvm::Vcpu) -> Result<()>304 pub fn setup_sregs(mem: &GuestMemory, vcpu: &kvm::Vcpu) -> Result<()> {
305     let mut sregs: kvm_sregs = vcpu.get_sregs().map_err(Error::GetSRegsIoctlFailed)?;
306 
307     configure_segments_and_sregs(mem, &mut sregs)?;
308     setup_page_tables(mem, &mut sregs)?; // TODO(dgreid) - Can this be done once per system instead?
309 
310     vcpu.set_sregs(&sregs).map_err(Error::SetSRegsIoctlFailed)?;
311 
312     Ok(())
313 }
314 
315 #[cfg(test)]
316 mod tests {
317     use super::*;
318     use sys_util::{GuestAddress, GuestMemory};
319 
create_guest_mem() -> GuestMemory320     fn create_guest_mem() -> GuestMemory {
321         GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap()
322     }
323 
read_u64(gm: &GuestMemory, offset: u64) -> u64324     fn read_u64(gm: &GuestMemory, offset: u64) -> u64 {
325         let read_addr = GuestAddress(offset);
326         gm.read_obj_from_addr(read_addr).unwrap()
327     }
328 
329     #[test]
segments_and_sregs()330     fn segments_and_sregs() {
331         let mut sregs: kvm_sregs = Default::default();
332         let gm = create_guest_mem();
333         configure_segments_and_sregs(&gm, &mut sregs).unwrap();
334 
335         assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
336         assert_eq!(0xaf9b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 8));
337         assert_eq!(0xcf93000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 16));
338         assert_eq!(0x8f8b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 24));
339         assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
340 
341         assert_eq!(0, sregs.cs.base);
342         assert_eq!(0xfffff, sregs.ds.limit);
343         assert_eq!(0x10, sregs.es.selector);
344         assert_eq!(1, sregs.fs.present);
345         assert_eq!(1, sregs.gs.g);
346         assert_eq!(0, sregs.ss.avl);
347         assert_eq!(0, sregs.tr.base);
348         assert_eq!(0xfffff, sregs.tr.limit);
349         assert_eq!(0, sregs.tr.avl);
350         assert_eq!(X86_CR0_PE, sregs.cr0);
351         assert_eq!(EFER_LME, sregs.efer);
352     }
353 
354     #[test]
page_tables()355     fn page_tables() {
356         let mut sregs: kvm_sregs = Default::default();
357         let gm = create_guest_mem();
358         setup_page_tables(&gm, &mut sregs).unwrap();
359 
360         assert_eq!(0xa003, read_u64(&gm, 0x9000));
361         assert_eq!(0xb003, read_u64(&gm, 0xa000));
362         for i in 0..512 {
363             assert_eq!((i << 21) + 0x83u64, read_u64(&gm, 0xb000 + i * 8));
364         }
365 
366         assert_eq!(0x9000, sregs.cr3);
367         assert_eq!(X86_CR4_PAE, sregs.cr4);
368         assert_eq!(X86_CR0_PG, sregs.cr0);
369     }
370 }
371