• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::{mem, result};
6 
7 use base::{self, warn};
8 use hypervisor::{Fpu, Register, Regs, Sregs, VcpuX86_64, Vm};
9 use remain::sorted;
10 use thiserror::Error;
11 use vm_memory::{GuestAddress, GuestMemory};
12 
13 use crate::gdt;
14 
15 #[sorted]
16 #[derive(Error, Debug)]
17 pub enum Error {
18     /// Failed to configure the FPU.
19     #[error("failed to configure the FPU: {0}")]
20     FpuIoctlFailed(base::Error),
21     /// Failed to get sregs for this cpu.
22     #[error("failed to get sregs for this cpu: {0}")]
23     GetSRegsIoctlFailed(base::Error),
24     /// Setting up msrs failed.
25     #[error("setting up msrs failed: {0}")]
26     MsrIoctlFailed(base::Error),
27     /// Failed to set sregs for this cpu.
28     #[error("failed to set sregs for this cpu: {0}")]
29     SetSRegsIoctlFailed(base::Error),
30     /// Failed to set base registers for this cpu.
31     #[error("failed to set base registers for this cpu: {0}")]
32     SettingRegistersIoctl(base::Error),
33     /// Writing the GDT to RAM failed.
34     #[error("writing the GDT to RAM failed")]
35     WriteGDTFailure,
36     /// Writing the IDT to RAM failed.
37     #[error("writing the IDT to RAM failed")]
38     WriteIDTFailure,
39     /// Writing PDE to RAM failed.
40     #[error("writing PDE to RAM failed")]
41     WritePDEAddress,
42     /// Writing PDPTE to RAM failed.
43     #[error("writing PDPTE to RAM failed")]
44     WritePDPTEAddress,
45     /// Writing PML4 to RAM failed.
46     #[error("writing PML4 to RAM failed")]
47     WritePML4Address,
48 }
49 
50 pub type Result<T> = result::Result<T, Error>;
51 
52 const MTRR_MEMTYPE_UC: u8 = 0x0;
53 const MTRR_MEMTYPE_WB: u8 = 0x6;
54 const MTRR_VAR_VALID: u64 = 0x800;
55 const MTRR_ENABLE: u64 = 0x800;
56 const MTRR_PHYS_BASE_MSR: u32 = 0x200;
57 const MTRR_PHYS_MASK_MSR: u32 = 0x201;
58 const VAR_MTRR_NUM_MASK: u64 = 0xFF;
59 
60 // Returns the value of the highest bit in a 64-bit value. Equivalent to
61 // 1 << HighBitSet(x)
get_power_of_two(data: u64) -> u6462 fn get_power_of_two(data: u64) -> u64 {
63     1 << (64 - data.leading_zeros() - 1)
64 }
65 
66 // Returns the max length which suitable for mtrr setting based on the
67 // specified (base, len)
get_max_len(base: u64, len: u64) -> u6468 fn get_max_len(base: u64, len: u64) -> u64 {
69     let mut ret = get_power_of_two(len);
70 
71     while base % ret != 0 {
72         ret >>= 1;
73     }
74 
75     ret
76 }
77 
78 // For the specified (Base, Len), returns (base, len) pair which could be
79 // set into mtrr register. mtrr requires: the base-address alignment value can't be
80 // less than its length
get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)>81 fn get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)> {
82     let mut vecs = Vec::new();
83 
84     let mut remains = len;
85     let mut new = base;
86     while remains != 0 {
87         let max = get_max_len(new, remains);
88         vecs.push((new, max));
89         remains -= max;
90         new += max;
91     }
92 
93     vecs
94 }
95 
append_mtrr_entries( vm: &dyn Vm, vpu: &dyn VcpuX86_64, pci_start: u64, entries: &mut Vec<Register>, )96 fn append_mtrr_entries(
97     vm: &dyn Vm,
98     vpu: &dyn VcpuX86_64,
99     pci_start: u64,
100     entries: &mut Vec<Register>,
101 ) {
102     // Get VAR MTRR num from MSR_MTRRcap
103     let mut msrs = vec![Register {
104         id: crate::msr_index::MSR_MTRRcap,
105         ..Default::default()
106     }];
107     if vpu.get_msrs(&mut msrs).is_err() {
108         warn!("get msrs fail, guest with pass through device may be very slow");
109         return;
110     }
111     let var_num = msrs[0].value & VAR_MTRR_NUM_MASK;
112 
113     // Set pci_start .. 4G as UC
114     // all others are set to default WB
115     let pci_len = (1 << 32) - pci_start;
116     let vecs = get_mtrr_pairs(pci_start, pci_len);
117     if vecs.len() as u64 > var_num {
118         warn!(
119             "mtrr fail for pci mmio, please check pci_start addr,
120               guest with pass through device may be very slow"
121         );
122         return;
123     }
124 
125     let phys_mask: u64 = (1 << vm.get_guest_phys_addr_bits()) - 1;
126     for (idx, (base, len)) in vecs.iter().enumerate() {
127         let reg_idx = idx as u32 * 2;
128         entries.push(Register {
129             id: MTRR_PHYS_BASE_MSR + reg_idx,
130             value: base | MTRR_MEMTYPE_UC as u64,
131         });
132         let mask: u64 = len.wrapping_neg() & phys_mask | MTRR_VAR_VALID;
133         entries.push(Register {
134             id: MTRR_PHYS_MASK_MSR + reg_idx,
135             value: mask,
136         });
137     }
138     // Disable fixed MTRRs and enable variable MTRRs, set default type as WB
139     entries.push(Register {
140         id: crate::msr_index::MSR_MTRRdefType,
141         value: MTRR_ENABLE | MTRR_MEMTYPE_WB as u64,
142     });
143 }
144 
create_msr_entries(vm: &dyn Vm, vcpu: &dyn VcpuX86_64, pci_start: u64) -> Vec<Register>145 fn create_msr_entries(vm: &dyn Vm, vcpu: &dyn VcpuX86_64, pci_start: u64) -> Vec<Register> {
146     let mut entries = vec![
147         Register {
148             id: crate::msr_index::MSR_IA32_SYSENTER_CS,
149             value: 0x0,
150         },
151         Register {
152             id: crate::msr_index::MSR_IA32_SYSENTER_ESP,
153             value: 0x0,
154         },
155         Register {
156             id: crate::msr_index::MSR_IA32_SYSENTER_EIP,
157             value: 0x0,
158         },
159         // x86_64 specific msrs, we only run on x86_64 not x86
160         Register {
161             id: crate::msr_index::MSR_STAR,
162             value: 0x0,
163         },
164         Register {
165             id: crate::msr_index::MSR_CSTAR,
166             value: 0x0,
167         },
168         Register {
169             id: crate::msr_index::MSR_KERNEL_GS_BASE,
170             value: 0x0,
171         },
172         Register {
173             id: crate::msr_index::MSR_SYSCALL_MASK,
174             value: 0x0,
175         },
176         Register {
177             id: crate::msr_index::MSR_LSTAR,
178             value: 0x0,
179         },
180         // end of x86_64 specific code
181         Register {
182             id: crate::msr_index::MSR_IA32_TSC,
183             value: 0x0,
184         },
185         Register {
186             id: crate::msr_index::MSR_IA32_MISC_ENABLE,
187             value: crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
188         },
189     ];
190     append_mtrr_entries(vm, vcpu, pci_start, &mut entries);
191     entries
192 }
193 
194 /// Configure Model specific registers for x86
195 ///
196 /// # Arguments
197 ///
198 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
setup_msrs(vm: &dyn Vm, vcpu: &dyn VcpuX86_64, pci_start: u64) -> Result<()>199 pub fn setup_msrs(vm: &dyn Vm, vcpu: &dyn VcpuX86_64, pci_start: u64) -> Result<()> {
200     let msrs = create_msr_entries(vm, vcpu, pci_start);
201     vcpu.set_msrs(&msrs).map_err(Error::MsrIoctlFailed)
202 }
203 
204 /// Configure FPU registers for x86
205 ///
206 /// # Arguments
207 ///
208 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
setup_fpu(vcpu: &dyn VcpuX86_64) -> Result<()>209 pub fn setup_fpu(vcpu: &dyn VcpuX86_64) -> Result<()> {
210     let fpu = Fpu {
211         fcw: 0x37f,
212         mxcsr: 0x1f80,
213         ..Default::default()
214     };
215 
216     vcpu.set_fpu(&fpu).map_err(Error::FpuIoctlFailed)
217 }
218 
219 /// Configure base registers for x86
220 ///
221 /// # Arguments
222 ///
223 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
224 /// * `boot_ip` - Starting instruction pointer.
225 /// * `boot_sp` - Starting stack pointer.
226 /// * `boot_si` - Must point to zero page address per Linux ABI.
setup_regs(vcpu: &dyn VcpuX86_64, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()>227 pub fn setup_regs(vcpu: &dyn VcpuX86_64, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()> {
228     let regs = Regs {
229         rflags: 0x0000000000000002u64,
230         rip: boot_ip,
231         rsp: boot_sp,
232         rbp: boot_sp,
233         rsi: boot_si,
234         ..Default::default()
235     };
236 
237     vcpu.set_regs(&regs).map_err(Error::SettingRegistersIoctl)
238 }
239 
240 const X86_CR0_PE: u64 = 0x1;
241 const X86_CR0_PG: u64 = 0x80000000;
242 const X86_CR4_PAE: u64 = 0x20;
243 
244 const EFER_LME: u64 = 0x100;
245 const EFER_LMA: u64 = 0x400;
246 
247 const BOOT_GDT_OFFSET: u64 = 0x1500;
248 const BOOT_IDT_OFFSET: u64 = 0x1520;
249 
250 const BOOT_GDT_MAX: usize = 4;
251 
write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()>252 fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
253     let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
254     for (index, entry) in table.iter().enumerate() {
255         let addr = guest_mem
256             .checked_offset(boot_gdt_addr, (index * mem::size_of::<u64>()) as u64)
257             .ok_or(Error::WriteGDTFailure)?;
258         guest_mem
259             .write_obj_at_addr(*entry, addr)
260             .map_err(|_| Error::WriteGDTFailure)?;
261     }
262     Ok(())
263 }
264 
write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()>265 fn write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()> {
266     let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
267     guest_mem
268         .write_obj_at_addr(val, boot_idt_addr)
269         .map_err(|_| Error::WriteIDTFailure)
270 }
271 
configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>272 fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
273     let gdt_table: [u64; BOOT_GDT_MAX as usize] = [
274         gdt::gdt_entry(0, 0, 0),            // NULL
275         gdt::gdt_entry(0xa09b, 0, 0xfffff), // CODE
276         gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
277         gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
278     ];
279 
280     let code_seg = gdt::segment_from_gdt(gdt_table[1], 1);
281     let data_seg = gdt::segment_from_gdt(gdt_table[2], 2);
282     let tss_seg = gdt::segment_from_gdt(gdt_table[3], 3);
283 
284     // Write segments
285     write_gdt_table(&gdt_table[..], mem)?;
286     sregs.gdt.base = BOOT_GDT_OFFSET as u64;
287     sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
288 
289     write_idt_value(0, mem)?;
290     sregs.idt.base = BOOT_IDT_OFFSET as u64;
291     sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
292 
293     sregs.cs = code_seg;
294     sregs.ds = data_seg;
295     sregs.es = data_seg;
296     sregs.fs = data_seg;
297     sregs.gs = data_seg;
298     sregs.ss = data_seg;
299     sregs.tr = tss_seg;
300 
301     /* 64-bit protected mode */
302     sregs.cr0 |= X86_CR0_PE;
303     sregs.efer |= EFER_LME;
304 
305     Ok(())
306 }
307 
setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>308 fn setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
309     // Puts PML4 right after zero page but aligned to 4k.
310     let boot_pml4_addr = GuestAddress(0x9000);
311     let boot_pdpte_addr = GuestAddress(0xa000);
312     let boot_pde_addr = GuestAddress(0xb000);
313 
314     // Entry covering VA [0..512GB)
315     mem.write_obj_at_addr(boot_pdpte_addr.offset() as u64 | 0x03, boot_pml4_addr)
316         .map_err(|_| Error::WritePML4Address)?;
317 
318     // Entry covering VA [0..1GB)
319     mem.write_obj_at_addr(boot_pde_addr.offset() as u64 | 0x03, boot_pdpte_addr)
320         .map_err(|_| Error::WritePDPTEAddress)?;
321 
322     // 512 2MB entries together covering VA [0..1GB). Note we are assuming
323     // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
324     for i in 0..512 {
325         mem.write_obj_at_addr((i << 21) + 0x83u64, boot_pde_addr.unchecked_add(i * 8))
326             .map_err(|_| Error::WritePDEAddress)?;
327     }
328     sregs.cr3 = boot_pml4_addr.offset() as u64;
329     sregs.cr4 |= X86_CR4_PAE;
330     sregs.cr0 |= X86_CR0_PG;
331     sregs.efer |= EFER_LMA; // Long mode is active. Must be auto-enabled with CR0_PG.
332     Ok(())
333 }
334 
335 /// Configures the segment registers and system page tables for a given CPU.
336 ///
337 /// # Arguments
338 ///
339 /// * `mem` - The memory that will be passed to the guest.
340 /// * `vcpu` - The VCPU to configure registers on.
setup_sregs(mem: &GuestMemory, vcpu: &dyn VcpuX86_64) -> Result<()>341 pub fn setup_sregs(mem: &GuestMemory, vcpu: &dyn VcpuX86_64) -> Result<()> {
342     let mut sregs = vcpu.get_sregs().map_err(Error::GetSRegsIoctlFailed)?;
343 
344     configure_segments_and_sregs(mem, &mut sregs)?;
345     setup_page_tables(mem, &mut sregs)?; // TODO(dgreid) - Can this be done once per system instead?
346 
347     vcpu.set_sregs(&sregs).map_err(Error::SetSRegsIoctlFailed)?;
348 
349     Ok(())
350 }
351 
352 #[cfg(test)]
353 mod tests {
354     use super::*;
355     use vm_memory::{GuestAddress, GuestMemory};
356 
create_guest_mem() -> GuestMemory357     fn create_guest_mem() -> GuestMemory {
358         GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap()
359     }
360 
read_u64(gm: &GuestMemory, offset: u64) -> u64361     fn read_u64(gm: &GuestMemory, offset: u64) -> u64 {
362         let read_addr = GuestAddress(offset);
363         gm.read_obj_from_addr(read_addr).unwrap()
364     }
365 
366     #[test]
segments_and_sregs()367     fn segments_and_sregs() {
368         let mut sregs = Default::default();
369         let gm = create_guest_mem();
370         configure_segments_and_sregs(&gm, &mut sregs).unwrap();
371 
372         assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
373         assert_eq!(0xaf9b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 8));
374         assert_eq!(0xcf93000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 16));
375         assert_eq!(0x8f8b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 24));
376         assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
377 
378         assert_eq!(0, sregs.cs.base);
379         assert_eq!(0xfffff, sregs.ds.limit);
380         assert_eq!(0x10, sregs.es.selector);
381         assert_eq!(1, sregs.fs.present);
382         assert_eq!(1, sregs.gs.g);
383         assert_eq!(0, sregs.ss.avl);
384         assert_eq!(0, sregs.tr.base);
385         assert_eq!(0xfffff, sregs.tr.limit);
386         assert_eq!(0, sregs.tr.avl);
387         assert_eq!(X86_CR0_PE, sregs.cr0);
388         assert_eq!(EFER_LME, sregs.efer);
389     }
390 
391     #[test]
page_tables()392     fn page_tables() {
393         let mut sregs = Default::default();
394         let gm = create_guest_mem();
395         setup_page_tables(&gm, &mut sregs).unwrap();
396 
397         assert_eq!(0xa003, read_u64(&gm, 0x9000));
398         assert_eq!(0xb003, read_u64(&gm, 0xa000));
399         for i in 0..512 {
400             assert_eq!((i << 21) + 0x83u64, read_u64(&gm, 0xb000 + i * 8));
401         }
402 
403         assert_eq!(0x9000, sregs.cr3);
404         assert_eq!(X86_CR4_PAE, sregs.cr4);
405         assert_eq!(X86_CR0_PG, sregs.cr0);
406     }
407 }
408