1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::alloc::Layout;
6 use std::fmt::{self, Display};
7 use std::{mem, result};
8
9 use assertions::const_assert;
10 use kvm;
11 use kvm_sys::kvm_fpu;
12 use kvm_sys::kvm_msr_entry;
13 use kvm_sys::kvm_msrs;
14 use kvm_sys::kvm_regs;
15 use kvm_sys::kvm_sregs;
16 use sys_util::{self, GuestAddress, GuestMemory, LayoutAllocation};
17
18 use crate::gdt;
19
20 #[derive(Debug)]
21 pub enum Error {
22 /// Setting up msrs failed.
23 MsrIoctlFailed(sys_util::Error),
24 /// Failed to configure the FPU.
25 FpuIoctlFailed(sys_util::Error),
26 /// Failed to get sregs for this cpu.
27 GetSRegsIoctlFailed(sys_util::Error),
28 /// Failed to set base registers for this cpu.
29 SettingRegistersIoctl(sys_util::Error),
30 /// Failed to set sregs for this cpu.
31 SetSRegsIoctlFailed(sys_util::Error),
32 /// Writing the GDT to RAM failed.
33 WriteGDTFailure,
34 /// Writing the IDT to RAM failed.
35 WriteIDTFailure,
36 /// Writing PML4 to RAM failed.
37 WritePML4Address,
38 /// Writing PDPTE to RAM failed.
39 WritePDPTEAddress,
40 /// Writing PDE to RAM failed.
41 WritePDEAddress,
42 }
43 pub type Result<T> = result::Result<T, Error>;
44
45 impl std::error::Error for Error {}
46
47 impl Display for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result48 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
49 use self::Error::*;
50
51 match self {
52 MsrIoctlFailed(e) => write!(f, "setting up msrs failed: {}", e),
53 FpuIoctlFailed(e) => write!(f, "failed to configure the FPU: {}", e),
54 GetSRegsIoctlFailed(e) => write!(f, "failed to get sregs for this cpu: {}", e),
55 SettingRegistersIoctl(e) => {
56 write!(f, "failed to set base registers for this cpu: {}", e)
57 }
58 SetSRegsIoctlFailed(e) => write!(f, "failed to set sregs for this cpu: {}", e),
59 WriteGDTFailure => write!(f, "writing the GDT to RAM failed"),
60 WriteIDTFailure => write!(f, "writing the IDT to RAM failed"),
61 WritePML4Address => write!(f, "writing PML4 to RAM failed"),
62 WritePDPTEAddress => write!(f, "writing PDPTE to RAM failed"),
63 WritePDEAddress => write!(f, "writing PDE to RAM failed"),
64 }
65 }
66 }
67
create_msr_entries() -> Vec<kvm_msr_entry>68 fn create_msr_entries() -> Vec<kvm_msr_entry> {
69 let mut entries = Vec::<kvm_msr_entry>::new();
70
71 entries.push(kvm_msr_entry {
72 index: crate::msr_index::MSR_IA32_SYSENTER_CS,
73 data: 0x0,
74 ..Default::default()
75 });
76 entries.push(kvm_msr_entry {
77 index: crate::msr_index::MSR_IA32_SYSENTER_ESP,
78 data: 0x0,
79 ..Default::default()
80 });
81 entries.push(kvm_msr_entry {
82 index: crate::msr_index::MSR_IA32_SYSENTER_EIP,
83 data: 0x0,
84 ..Default::default()
85 });
86 // x86_64 specific msrs, we only run on x86_64 not x86
87 entries.push(kvm_msr_entry {
88 index: crate::msr_index::MSR_STAR,
89 data: 0x0,
90 ..Default::default()
91 });
92 entries.push(kvm_msr_entry {
93 index: crate::msr_index::MSR_CSTAR,
94 data: 0x0,
95 ..Default::default()
96 });
97 entries.push(kvm_msr_entry {
98 index: crate::msr_index::MSR_KERNEL_GS_BASE,
99 data: 0x0,
100 ..Default::default()
101 });
102 entries.push(kvm_msr_entry {
103 index: crate::msr_index::MSR_SYSCALL_MASK,
104 data: 0x0,
105 ..Default::default()
106 });
107 entries.push(kvm_msr_entry {
108 index: crate::msr_index::MSR_LSTAR,
109 data: 0x0,
110 ..Default::default()
111 });
112 // end of x86_64 specific code
113 entries.push(kvm_msr_entry {
114 index: crate::msr_index::MSR_IA32_TSC,
115 data: 0x0,
116 ..Default::default()
117 });
118 entries.push(kvm_msr_entry {
119 index: crate::msr_index::MSR_IA32_MISC_ENABLE,
120 data: crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
121 ..Default::default()
122 });
123
124 entries
125 }
126
127 /// Configure Model specific registers for x86
128 ///
129 /// # Arguments
130 ///
131 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
setup_msrs(vcpu: &kvm::Vcpu) -> Result<()>132 pub fn setup_msrs(vcpu: &kvm::Vcpu) -> Result<()> {
133 const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
134 const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
135 const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
136 const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>();
137 const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY);
138
139 let entry_vec = create_msr_entries();
140 let size = SIZE_OF_MSRS + entry_vec.len() * SIZE_OF_ENTRY;
141 let layout = Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
142 let mut allocation = LayoutAllocation::zeroed(layout);
143
144 // Safe to obtain an exclusive reference because there are no other
145 // references to the allocation yet and all-zero is a valid bit pattern.
146 let msrs = unsafe { allocation.as_mut::<kvm_msrs>() };
147
148 unsafe {
149 // Mapping the unsized array to a slice is unsafe becase the length isn't known. Providing
150 // the length used to create the struct guarantees the entire slice is valid.
151 let entries: &mut [kvm_msr_entry] = msrs.entries.as_mut_slice(entry_vec.len());
152 entries.copy_from_slice(&entry_vec);
153 }
154 msrs.nmsrs = entry_vec.len() as u32;
155
156 vcpu.set_msrs(msrs).map_err(Error::MsrIoctlFailed)?;
157
158 Ok(())
159
160 // msrs allocation is deallocated.
161 }
162
163 /// Configure FPU registers for x86
164 ///
165 /// # Arguments
166 ///
167 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
setup_fpu(vcpu: &kvm::Vcpu) -> Result<()>168 pub fn setup_fpu(vcpu: &kvm::Vcpu) -> Result<()> {
169 let fpu: kvm_fpu = kvm_fpu {
170 fcw: 0x37f,
171 mxcsr: 0x1f80,
172 ..Default::default()
173 };
174
175 vcpu.set_fpu(&fpu).map_err(Error::FpuIoctlFailed)?;
176
177 Ok(())
178 }
179
180 /// Configure base registers for x86
181 ///
182 /// # Arguments
183 ///
184 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
185 /// * `boot_ip` - Starting instruction pointer.
186 /// * `boot_sp` - Starting stack pointer.
187 /// * `boot_si` - Must point to zero page address per Linux ABI.
setup_regs(vcpu: &kvm::Vcpu, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()>188 pub fn setup_regs(vcpu: &kvm::Vcpu, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()> {
189 let regs: kvm_regs = kvm_regs {
190 rflags: 0x0000000000000002u64,
191 rip: boot_ip,
192 rsp: boot_sp,
193 rbp: boot_sp,
194 rsi: boot_si,
195 ..Default::default()
196 };
197
198 vcpu.set_regs(®s).map_err(Error::SettingRegistersIoctl)?;
199
200 Ok(())
201 }
202
203 const X86_CR0_PE: u64 = 0x1;
204 const X86_CR0_PG: u64 = 0x80000000;
205 const X86_CR4_PAE: u64 = 0x20;
206
207 const EFER_LME: u64 = 0x100;
208 const EFER_LMA: u64 = 0x400;
209
210 const BOOT_GDT_OFFSET: u64 = 0x500;
211 const BOOT_IDT_OFFSET: u64 = 0x520;
212
213 const BOOT_GDT_MAX: usize = 4;
214
write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()>215 fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
216 let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
217 for (index, entry) in table.iter().enumerate() {
218 let addr = guest_mem
219 .checked_offset(boot_gdt_addr, (index * mem::size_of::<u64>()) as u64)
220 .ok_or(Error::WriteGDTFailure)?;
221 guest_mem
222 .write_obj_at_addr(*entry, addr)
223 .map_err(|_| Error::WriteGDTFailure)?;
224 }
225 Ok(())
226 }
227
write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()>228 fn write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()> {
229 let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
230 guest_mem
231 .write_obj_at_addr(val, boot_idt_addr)
232 .map_err(|_| Error::WriteIDTFailure)
233 }
234
configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut kvm_sregs) -> Result<()>235 fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut kvm_sregs) -> Result<()> {
236 let gdt_table: [u64; BOOT_GDT_MAX as usize] = [
237 gdt::gdt_entry(0, 0, 0), // NULL
238 gdt::gdt_entry(0xa09b, 0, 0xfffff), // CODE
239 gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
240 gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
241 ];
242
243 let code_seg = gdt::kvm_segment_from_gdt(gdt_table[1], 1);
244 let data_seg = gdt::kvm_segment_from_gdt(gdt_table[2], 2);
245 let tss_seg = gdt::kvm_segment_from_gdt(gdt_table[3], 3);
246
247 // Write segments
248 write_gdt_table(&gdt_table[..], mem)?;
249 sregs.gdt.base = BOOT_GDT_OFFSET as u64;
250 sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
251
252 write_idt_value(0, mem)?;
253 sregs.idt.base = BOOT_IDT_OFFSET as u64;
254 sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
255
256 sregs.cs = code_seg;
257 sregs.ds = data_seg;
258 sregs.es = data_seg;
259 sregs.fs = data_seg;
260 sregs.gs = data_seg;
261 sregs.ss = data_seg;
262 sregs.tr = tss_seg;
263
264 /* 64-bit protected mode */
265 sregs.cr0 |= X86_CR0_PE;
266 sregs.efer |= EFER_LME;
267
268 Ok(())
269 }
270
setup_page_tables(mem: &GuestMemory, sregs: &mut kvm_sregs) -> Result<()>271 fn setup_page_tables(mem: &GuestMemory, sregs: &mut kvm_sregs) -> Result<()> {
272 // Puts PML4 right after zero page but aligned to 4k.
273 let boot_pml4_addr = GuestAddress(0x9000);
274 let boot_pdpte_addr = GuestAddress(0xa000);
275 let boot_pde_addr = GuestAddress(0xb000);
276
277 // Entry covering VA [0..512GB)
278 mem.write_obj_at_addr(boot_pdpte_addr.offset() as u64 | 0x03, boot_pml4_addr)
279 .map_err(|_| Error::WritePML4Address)?;
280
281 // Entry covering VA [0..1GB)
282 mem.write_obj_at_addr(boot_pde_addr.offset() as u64 | 0x03, boot_pdpte_addr)
283 .map_err(|_| Error::WritePDPTEAddress)?;
284
285 // 512 2MB entries together covering VA [0..1GB). Note we are assuming
286 // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
287 for i in 0..512 {
288 mem.write_obj_at_addr((i << 21) + 0x83u64, boot_pde_addr.unchecked_add(i * 8))
289 .map_err(|_| Error::WritePDEAddress)?;
290 }
291 sregs.cr3 = boot_pml4_addr.offset() as u64;
292 sregs.cr4 |= X86_CR4_PAE;
293 sregs.cr0 |= X86_CR0_PG;
294 sregs.efer |= EFER_LMA; // Long mode is active. Must be auto-enabled with CR0_PG.
295 Ok(())
296 }
297
298 /// Configures the segment registers and system page tables for a given CPU.
299 ///
300 /// # Arguments
301 ///
302 /// * `mem` - The memory that will be passed to the guest.
303 /// * `vcpu_fd` - The FD returned from the KVM_CREATE_VCPU ioctl.
setup_sregs(mem: &GuestMemory, vcpu: &kvm::Vcpu) -> Result<()>304 pub fn setup_sregs(mem: &GuestMemory, vcpu: &kvm::Vcpu) -> Result<()> {
305 let mut sregs: kvm_sregs = vcpu.get_sregs().map_err(Error::GetSRegsIoctlFailed)?;
306
307 configure_segments_and_sregs(mem, &mut sregs)?;
308 setup_page_tables(mem, &mut sregs)?; // TODO(dgreid) - Can this be done once per system instead?
309
310 vcpu.set_sregs(&sregs).map_err(Error::SetSRegsIoctlFailed)?;
311
312 Ok(())
313 }
314
315 #[cfg(test)]
316 mod tests {
317 use super::*;
318 use sys_util::{GuestAddress, GuestMemory};
319
create_guest_mem() -> GuestMemory320 fn create_guest_mem() -> GuestMemory {
321 GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap()
322 }
323
read_u64(gm: &GuestMemory, offset: u64) -> u64324 fn read_u64(gm: &GuestMemory, offset: u64) -> u64 {
325 let read_addr = GuestAddress(offset);
326 gm.read_obj_from_addr(read_addr).unwrap()
327 }
328
329 #[test]
segments_and_sregs()330 fn segments_and_sregs() {
331 let mut sregs: kvm_sregs = Default::default();
332 let gm = create_guest_mem();
333 configure_segments_and_sregs(&gm, &mut sregs).unwrap();
334
335 assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
336 assert_eq!(0xaf9b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 8));
337 assert_eq!(0xcf93000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 16));
338 assert_eq!(0x8f8b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 24));
339 assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
340
341 assert_eq!(0, sregs.cs.base);
342 assert_eq!(0xfffff, sregs.ds.limit);
343 assert_eq!(0x10, sregs.es.selector);
344 assert_eq!(1, sregs.fs.present);
345 assert_eq!(1, sregs.gs.g);
346 assert_eq!(0, sregs.ss.avl);
347 assert_eq!(0, sregs.tr.base);
348 assert_eq!(0xfffff, sregs.tr.limit);
349 assert_eq!(0, sregs.tr.avl);
350 assert_eq!(X86_CR0_PE, sregs.cr0);
351 assert_eq!(EFER_LME, sregs.efer);
352 }
353
354 #[test]
page_tables()355 fn page_tables() {
356 let mut sregs: kvm_sregs = Default::default();
357 let gm = create_guest_mem();
358 setup_page_tables(&gm, &mut sregs).unwrap();
359
360 assert_eq!(0xa003, read_u64(&gm, 0x9000));
361 assert_eq!(0xb003, read_u64(&gm, 0xa000));
362 for i in 0..512 {
363 assert_eq!((i << 21) + 0x83u64, read_u64(&gm, 0xb000 + i * 8));
364 }
365
366 assert_eq!(0x9000, sregs.cr3);
367 assert_eq!(X86_CR4_PAE, sregs.cr4);
368 assert_eq!(X86_CR0_PG, sregs.cr0);
369 }
370 }
371