1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::mem;
6 use std::result;
7
8 use base::warn;
9 use hypervisor::Register;
10 use hypervisor::Sregs;
11 use hypervisor::VcpuX86_64;
12 use hypervisor::Vm;
13 use remain::sorted;
14 use thiserror::Error;
15 use vm_memory::GuestAddress;
16 use vm_memory::GuestMemory;
17
18 use crate::gdt;
19
20 #[sorted]
21 #[derive(Error, Debug)]
22 pub enum Error {
23 /// Failed to get sregs for this cpu.
24 #[error("failed to get sregs for this cpu: {0}")]
25 GetSRegsIoctlFailed(base::Error),
26 /// Failed to get base registers for this cpu.
27 #[error("failed to get base registers for this cpu: {0}")]
28 GettingRegistersIoctl(base::Error),
29 /// Failed to set sregs for this cpu.
30 #[error("failed to set sregs for this cpu: {0}")]
31 SetSRegsIoctlFailed(base::Error),
32 /// Failed to set base registers for this cpu.
33 #[error("failed to set base registers for this cpu: {0}")]
34 SettingRegistersIoctl(base::Error),
35 /// Writing the GDT to RAM failed.
36 #[error("writing the GDT to RAM failed")]
37 WriteGDTFailure,
38 /// Writing the IDT to RAM failed.
39 #[error("writing the IDT to RAM failed")]
40 WriteIDTFailure,
41 /// Writing PDE to RAM failed.
42 #[error("writing PDE to RAM failed")]
43 WritePDEAddress,
44 /// Writing PDPTE to RAM failed.
45 #[error("writing PDPTE to RAM failed")]
46 WritePDPTEAddress,
47 /// Writing PML4 to RAM failed.
48 #[error("writing PML4 to RAM failed")]
49 WritePML4Address,
50 }
51
52 pub type Result<T> = result::Result<T, Error>;
53
54 const MTRR_MEMTYPE_UC: u8 = 0x0;
55 const MTRR_MEMTYPE_WB: u8 = 0x6;
56 const MTRR_VAR_VALID: u64 = 0x800;
57 const MTRR_ENABLE: u64 = 0x800;
58 const MTRR_PHYS_BASE_MSR: u32 = 0x200;
59 const MTRR_PHYS_MASK_MSR: u32 = 0x201;
60 const VAR_MTRR_NUM_MASK: u64 = 0xFF;
61
62 // Returns the value of the highest bit in a 64-bit value. Equivalent to
63 // 1 << HighBitSet(x)
get_power_of_two(data: u64) -> u6464 fn get_power_of_two(data: u64) -> u64 {
65 1 << (64 - data.leading_zeros() - 1)
66 }
67
68 // Returns the max length which suitable for mtrr setting based on the
69 // specified (base, len)
get_max_len(base: u64, len: u64) -> u6470 fn get_max_len(base: u64, len: u64) -> u64 {
71 let mut ret = get_power_of_two(len);
72
73 while base % ret != 0 {
74 ret >>= 1;
75 }
76
77 ret
78 }
79
80 // For the specified (Base, Len), returns (base, len) pair which could be
81 // set into mtrr register. mtrr requires: the base-address alignment value can't be
82 // less than its length
get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)>83 fn get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)> {
84 let mut vecs = Vec::new();
85
86 let mut remains = len;
87 let mut new = base;
88 while remains != 0 {
89 let max = get_max_len(new, remains);
90 vecs.push((new, max));
91 remains -= max;
92 new += max;
93 }
94
95 vecs
96 }
97
98 /// Returns the number of variable MTRR entries supported by `vcpu`.
vcpu_supported_variable_mtrrs(vcpu: &dyn VcpuX86_64) -> usize99 pub fn vcpu_supported_variable_mtrrs(vcpu: &dyn VcpuX86_64) -> usize {
100 // Get VAR MTRR num from MSR_MTRRcap
101 let mut msrs = vec![Register {
102 id: crate::msr_index::MSR_MTRRcap,
103 ..Default::default()
104 }];
105 if vcpu.get_msrs(&mut msrs).is_err() {
106 warn!("get msrs fail, guest with pass through device may be very slow");
107 0
108 } else {
109 (msrs[0].value & VAR_MTRR_NUM_MASK) as usize
110 }
111 }
112
113 /// Returns `true` if the given MSR `id` is a MTRR entry.
is_mtrr_msr(id: u32) -> bool114 pub fn is_mtrr_msr(id: u32) -> bool {
115 // Variable MTRR MSRs are pairs starting at 0x200 (MTRR_PHYS_BASE_MSR) / 0x201
116 // (MTRR_PHYS_MASK_MSR) and extending up to 0xFF pairs at most.
117 (id >= MTRR_PHYS_BASE_MSR && id <= MTRR_PHYS_BASE_MSR + 2 * VAR_MTRR_NUM_MASK as u32)
118 || id == crate::msr_index::MSR_MTRRdefType
119 }
120
121 /// Returns the count of variable MTRR entries specified by the list of `msrs`.
count_variable_mtrrs(msrs: &[Register]) -> usize122 pub fn count_variable_mtrrs(msrs: &[Register]) -> usize {
123 // Each variable MTRR takes up two MSRs (base + mask), so divide by 2. This will also count the
124 // MTRRdefType entry, but that is only one extra and the division truncates, so it won't affect
125 // the final count.
126 msrs.iter().filter(|msr| is_mtrr_msr(msr.id)).count() / 2
127 }
128
129 /// Returns a set of MSRs containing the MTRR configuration.
mtrr_msrs(vm: &dyn Vm, pci_start: u64) -> Vec<Register>130 pub fn mtrr_msrs(vm: &dyn Vm, pci_start: u64) -> Vec<Register> {
131 // Set pci_start .. 4G as UC
132 // all others are set to default WB
133 let pci_len = (1 << 32) - pci_start;
134 let vecs = get_mtrr_pairs(pci_start, pci_len);
135
136 let mut entries = Vec::new();
137
138 let phys_mask: u64 = (1 << vm.get_guest_phys_addr_bits()) - 1;
139 for (idx, (base, len)) in vecs.iter().enumerate() {
140 let reg_idx = idx as u32 * 2;
141 entries.push(Register {
142 id: MTRR_PHYS_BASE_MSR + reg_idx,
143 value: base | MTRR_MEMTYPE_UC as u64,
144 });
145 let mask: u64 = len.wrapping_neg() & phys_mask | MTRR_VAR_VALID;
146 entries.push(Register {
147 id: MTRR_PHYS_MASK_MSR + reg_idx,
148 value: mask,
149 });
150 }
151 // Disable fixed MTRRs and enable variable MTRRs, set default type as WB
152 entries.push(Register {
153 id: crate::msr_index::MSR_MTRRdefType,
154 value: MTRR_ENABLE | MTRR_MEMTYPE_WB as u64,
155 });
156 entries
157 }
158
159 /// Returns the default value of MSRs at reset.
160 ///
161 /// Currently only sets IA32_TSC to 0.
default_msrs() -> Vec<Register>162 pub fn default_msrs() -> Vec<Register> {
163 vec![
164 Register {
165 id: crate::msr_index::MSR_IA32_TSC,
166 value: 0x0,
167 },
168 Register {
169 id: crate::msr_index::MSR_IA32_MISC_ENABLE,
170 value: crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
171 },
172 ]
173 }
174
175 /// Configure Model specific registers for long (64-bit) mode.
long_mode_msrs() -> Vec<Register>176 pub fn long_mode_msrs() -> Vec<Register> {
177 vec![
178 Register {
179 id: crate::msr_index::MSR_IA32_SYSENTER_CS,
180 value: 0x0,
181 },
182 Register {
183 id: crate::msr_index::MSR_IA32_SYSENTER_ESP,
184 value: 0x0,
185 },
186 Register {
187 id: crate::msr_index::MSR_IA32_SYSENTER_EIP,
188 value: 0x0,
189 },
190 // x86_64 specific msrs, we only run on x86_64 not x86
191 Register {
192 id: crate::msr_index::MSR_STAR,
193 value: 0x0,
194 },
195 Register {
196 id: crate::msr_index::MSR_CSTAR,
197 value: 0x0,
198 },
199 Register {
200 id: crate::msr_index::MSR_KERNEL_GS_BASE,
201 value: 0x0,
202 },
203 Register {
204 id: crate::msr_index::MSR_SYSCALL_MASK,
205 value: 0x0,
206 },
207 Register {
208 id: crate::msr_index::MSR_LSTAR,
209 value: 0x0,
210 },
211 // end of x86_64 specific code
212 Register {
213 id: crate::msr_index::MSR_IA32_TSC,
214 value: 0x0,
215 },
216 Register {
217 id: crate::msr_index::MSR_IA32_MISC_ENABLE,
218 value: crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
219 },
220 ]
221 }
222
223 const X86_CR0_PE: u64 = 0x1;
224 const X86_CR0_PG: u64 = 0x80000000;
225 const X86_CR4_PAE: u64 = 0x20;
226
227 const EFER_LME: u64 = 0x100;
228 const EFER_LMA: u64 = 0x400;
229
230 const BOOT_GDT_OFFSET: u64 = 0x1500;
231 const BOOT_IDT_OFFSET: u64 = 0x1520;
232
233 const BOOT_GDT_MAX: usize = 4;
234
write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()>235 fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
236 let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
237 for (index, entry) in table.iter().enumerate() {
238 let addr = boot_gdt_addr
239 .checked_add((index * mem::size_of::<u64>()) as u64)
240 .ok_or(Error::WriteGDTFailure)?;
241 if !guest_mem.is_valid_range(addr, mem::size_of::<u64>() as u64) {
242 return Err(Error::WriteGDTFailure);
243 }
244
245 guest_mem
246 .write_obj_at_addr(*entry, addr)
247 .map_err(|_| Error::WriteGDTFailure)?;
248 }
249 Ok(())
250 }
251
write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()>252 fn write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()> {
253 let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
254 guest_mem
255 .write_obj_at_addr(val, boot_idt_addr)
256 .map_err(|_| Error::WriteIDTFailure)
257 }
258
259 /// Configures the GDT, IDT, and segment registers for long mode.
configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>260 pub fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
261 let gdt_table: [u64; BOOT_GDT_MAX as usize] = [
262 gdt::gdt_entry(0, 0, 0), // NULL
263 gdt::gdt_entry(0xa09b, 0, 0xfffff), // CODE
264 gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
265 gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
266 ];
267
268 let code_seg = gdt::segment_from_gdt(gdt_table[1], 1);
269 let data_seg = gdt::segment_from_gdt(gdt_table[2], 2);
270 let tss_seg = gdt::segment_from_gdt(gdt_table[3], 3);
271
272 // Write segments
273 write_gdt_table(&gdt_table[..], mem)?;
274 sregs.gdt.base = BOOT_GDT_OFFSET as u64;
275 sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
276
277 write_idt_value(0, mem)?;
278 sregs.idt.base = BOOT_IDT_OFFSET as u64;
279 sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
280
281 sregs.cs = code_seg;
282 sregs.ds = data_seg;
283 sregs.es = data_seg;
284 sregs.fs = data_seg;
285 sregs.gs = data_seg;
286 sregs.ss = data_seg;
287 sregs.tr = tss_seg;
288
289 /* 64-bit protected mode */
290 sregs.cr0 |= X86_CR0_PE;
291 sregs.efer |= EFER_LME;
292
293 Ok(())
294 }
295
296 /// Configures the system page tables and control registers for long mode with paging.
setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>297 pub fn setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
298 // Puts PML4 right after zero page but aligned to 4k.
299 let boot_pml4_addr = GuestAddress(0x9000);
300 let boot_pdpte_addr = GuestAddress(0xa000);
301 let boot_pde_addr = GuestAddress(0xb000);
302
303 // Entry covering VA [0..512GB)
304 mem.write_obj_at_addr(boot_pdpte_addr.offset() as u64 | 0x03, boot_pml4_addr)
305 .map_err(|_| Error::WritePML4Address)?;
306
307 // Entry covering VA [0..1GB)
308 mem.write_obj_at_addr(boot_pde_addr.offset() as u64 | 0x03, boot_pdpte_addr)
309 .map_err(|_| Error::WritePDPTEAddress)?;
310
311 // 512 2MB entries together covering VA [0..1GB). Note we are assuming
312 // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
313 for i in 0..512 {
314 mem.write_obj_at_addr((i << 21) + 0x83u64, boot_pde_addr.unchecked_add(i * 8))
315 .map_err(|_| Error::WritePDEAddress)?;
316 }
317 sregs.cr3 = boot_pml4_addr.offset() as u64;
318 sregs.cr4 |= X86_CR4_PAE;
319 sregs.cr0 |= X86_CR0_PG;
320 sregs.efer |= EFER_LMA; // Long mode is active. Must be auto-enabled with CR0_PG.
321 Ok(())
322 }
323
324 #[cfg(test)]
325 mod tests {
326 use vm_memory::GuestAddress;
327 use vm_memory::GuestMemory;
328
329 use super::*;
330
create_guest_mem() -> GuestMemory331 fn create_guest_mem() -> GuestMemory {
332 GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap()
333 }
334
read_u64(gm: &GuestMemory, offset: u64) -> u64335 fn read_u64(gm: &GuestMemory, offset: u64) -> u64 {
336 let read_addr = GuestAddress(offset);
337 gm.read_obj_from_addr(read_addr).unwrap()
338 }
339
340 #[test]
segments_and_sregs()341 fn segments_and_sregs() {
342 let mut sregs = Default::default();
343 let gm = create_guest_mem();
344 configure_segments_and_sregs(&gm, &mut sregs).unwrap();
345
346 assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
347 assert_eq!(0xaf9b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 8));
348 assert_eq!(0xcf93000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 16));
349 assert_eq!(0x8f8b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 24));
350 assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
351
352 assert_eq!(0, sregs.cs.base);
353 assert_eq!(0xfffff, sregs.ds.limit);
354 assert_eq!(0x10, sregs.es.selector);
355 assert_eq!(1, sregs.fs.present);
356 assert_eq!(1, sregs.gs.g);
357 assert_eq!(0, sregs.ss.avl);
358 assert_eq!(0, sregs.tr.base);
359 assert_eq!(0xfffff, sregs.tr.limit);
360 assert_eq!(0, sregs.tr.avl);
361 assert_eq!(X86_CR0_PE, sregs.cr0 & X86_CR0_PE);
362 assert_eq!(EFER_LME, sregs.efer);
363 }
364
365 #[test]
page_tables()366 fn page_tables() {
367 let mut sregs = Default::default();
368 let gm = create_guest_mem();
369 setup_page_tables(&gm, &mut sregs).unwrap();
370
371 assert_eq!(0xa003, read_u64(&gm, 0x9000));
372 assert_eq!(0xb003, read_u64(&gm, 0xa000));
373 for i in 0..512 {
374 assert_eq!((i << 21) + 0x83u64, read_u64(&gm, 0xb000 + i * 8));
375 }
376
377 assert_eq!(0x9000, sregs.cr3);
378 assert_eq!(X86_CR4_PAE, sregs.cr4);
379 assert_eq!(X86_CR0_PG, sregs.cr0 & X86_CR0_PG);
380 }
381 }
382