• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::alloc::Layout;
6 use std::cell::Cell;
7 use std::cell::RefCell;
8 use std::cmp;
9 use std::cmp::min;
10 use std::cmp::Ord;
11 use std::cmp::PartialEq;
12 use std::cmp::PartialOrd;
13 use std::collections::btree_set::BTreeSet;
14 use std::io::Read;
15 use std::io::Write;
16 use std::mem;
17 use std::sync::Arc;
18 use std::sync::RwLock;
19 
20 use base::error;
21 use base::LayoutAllocation;
22 use data_model::DataInit;
23 use kvm::CpuId;
24 use kvm::Vcpu;
25 use kvm_sys::kvm_debugregs;
26 use kvm_sys::kvm_enable_cap;
27 use kvm_sys::kvm_fpu;
28 use kvm_sys::kvm_lapic_state;
29 use kvm_sys::kvm_mp_state;
30 use kvm_sys::kvm_msr_entry;
31 use kvm_sys::kvm_msrs;
32 use kvm_sys::kvm_regs;
33 use kvm_sys::kvm_sregs;
34 use kvm_sys::kvm_vcpu_events;
35 use kvm_sys::kvm_xcrs;
36 use kvm_sys::KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
37 use libc::EINVAL;
38 use libc::ENOENT;
39 use libc::ENOTTY;
40 use libc::EPERM;
41 use libc::EPIPE;
42 use libc::EPROTO;
43 use protobuf::CodedOutputStream;
44 use protobuf::Message;
45 use protos::plugin::*;
46 use static_assertions::const_assert;
47 use sync::Mutex;
48 
49 use super::*;
50 
51 /// Identifier for an address space in the VM.
52 #[derive(Copy, Clone, PartialEq, Eq)]
53 pub enum IoSpace {
54     Ioport,
55     Mmio,
56 }
57 
58 #[derive(Debug, Copy, Clone)]
59 struct Range(u64, u64, bool);
60 
61 impl Eq for Range {}
62 
63 impl PartialEq for Range {
eq(&self, other: &Range) -> bool64     fn eq(&self, other: &Range) -> bool {
65         self.0 == other.0
66     }
67 }
68 
69 impl Ord for Range {
cmp(&self, other: &Range) -> cmp::Ordering70     fn cmp(&self, other: &Range) -> cmp::Ordering {
71         self.0.cmp(&other.0)
72     }
73 }
74 
75 impl PartialOrd for Range {
partial_cmp(&self, other: &Range) -> Option<cmp::Ordering>76     fn partial_cmp(&self, other: &Range) -> Option<cmp::Ordering> {
77         self.0.partial_cmp(&other.0)
78     }
79 }
80 
81 // Wrapper types to make the kvm register structs DataInit
82 #[derive(Copy, Clone)]
83 struct VcpuRegs(kvm_regs);
84 unsafe impl DataInit for VcpuRegs {}
85 #[derive(Copy, Clone)]
86 struct VcpuSregs(kvm_sregs);
87 unsafe impl DataInit for VcpuSregs {}
88 #[derive(Copy, Clone)]
89 struct VcpuFpu(kvm_fpu);
90 unsafe impl DataInit for VcpuFpu {}
91 #[derive(Copy, Clone)]
92 struct VcpuDebugregs(kvm_debugregs);
93 unsafe impl DataInit for VcpuDebugregs {}
94 #[derive(Copy, Clone)]
95 struct VcpuXcregs(kvm_xcrs);
96 unsafe impl DataInit for VcpuXcregs {}
97 #[derive(Copy, Clone)]
98 struct VcpuLapicState(kvm_lapic_state);
99 unsafe impl DataInit for VcpuLapicState {}
100 #[derive(Copy, Clone)]
101 struct VcpuMpState(kvm_mp_state);
102 unsafe impl DataInit for VcpuMpState {}
103 #[derive(Copy, Clone)]
104 struct VcpuEvents(kvm_vcpu_events);
105 unsafe impl DataInit for VcpuEvents {}
106 
get_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet) -> SysResult<Vec<u8>>107 fn get_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet) -> SysResult<Vec<u8>> {
108     Ok(match state_set {
109         VcpuRequest_StateSet::REGS => VcpuRegs(vcpu.get_regs()?).as_slice().to_vec(),
110         VcpuRequest_StateSet::SREGS => VcpuSregs(vcpu.get_sregs()?).as_slice().to_vec(),
111         VcpuRequest_StateSet::FPU => VcpuFpu(vcpu.get_fpu()?).as_slice().to_vec(),
112         VcpuRequest_StateSet::DEBUGREGS => VcpuDebugregs(vcpu.get_debugregs()?).as_slice().to_vec(),
113         VcpuRequest_StateSet::XCREGS => VcpuXcregs(vcpu.get_xcrs()?).as_slice().to_vec(),
114         VcpuRequest_StateSet::LAPIC => VcpuLapicState(vcpu.get_lapic()?).as_slice().to_vec(),
115         VcpuRequest_StateSet::MP => VcpuMpState(vcpu.get_mp_state()?).as_slice().to_vec(),
116         VcpuRequest_StateSet::EVENTS => VcpuEvents(vcpu.get_vcpu_events()?).as_slice().to_vec(),
117     })
118 }
119 
set_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet, state: &[u8]) -> SysResult<()>120 fn set_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet, state: &[u8]) -> SysResult<()> {
121     match state_set {
122         VcpuRequest_StateSet::REGS => {
123             vcpu.set_regs(&VcpuRegs::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
124         }
125         VcpuRequest_StateSet::SREGS => {
126             vcpu.set_sregs(&VcpuSregs::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
127         }
128         VcpuRequest_StateSet::FPU => {
129             vcpu.set_fpu(&VcpuFpu::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
130         }
131         VcpuRequest_StateSet::DEBUGREGS => vcpu.set_debugregs(
132             &VcpuDebugregs::from_slice(state)
133                 .ok_or(SysError::new(EINVAL))?
134                 .0,
135         ),
136         VcpuRequest_StateSet::XCREGS => vcpu.set_xcrs(
137             &VcpuXcregs::from_slice(state)
138                 .ok_or(SysError::new(EINVAL))?
139                 .0,
140         ),
141         VcpuRequest_StateSet::LAPIC => vcpu.set_lapic(
142             &VcpuLapicState::from_slice(state)
143                 .ok_or(SysError::new(EINVAL))?
144                 .0,
145         ),
146         VcpuRequest_StateSet::MP => vcpu.set_mp_state(
147             &VcpuMpState::from_slice(state)
148                 .ok_or(SysError::new(EINVAL))?
149                 .0,
150         ),
151         VcpuRequest_StateSet::EVENTS => vcpu.set_vcpu_events(
152             &VcpuEvents::from_slice(state)
153                 .ok_or(SysError::new(EINVAL))?
154                 .0,
155         ),
156     }
157 }
158 
159 pub struct CallHintDetails {
160     pub match_rax: bool,
161     pub match_rbx: bool,
162     pub match_rcx: bool,
163     pub match_rdx: bool,
164     pub rax: u64,
165     pub rbx: u64,
166     pub rcx: u64,
167     pub rdx: u64,
168     pub send_sregs: bool,
169     pub send_debugregs: bool,
170 }
171 
172 pub struct CallHint {
173     io_space: IoSpace,
174     addr: u64,
175     on_write: bool,
176     regs: Vec<CallHintDetails>,
177 }
178 
179 /// State shared by every VCPU, grouped together to make edits to the state coherent across VCPUs.
180 #[derive(Default)]
181 pub struct SharedVcpuState {
182     ioport_regions: BTreeSet<Range>,
183     mmio_regions: BTreeSet<Range>,
184     hint: Option<CallHint>,
185 }
186 
187 impl SharedVcpuState {
188     /// Reserves the given range for handling by the plugin process.
189     ///
190     /// This will reject any reservation that overlaps with an existing reservation.
reserve_range( &mut self, space: IoSpace, start: u64, length: u64, async_write: bool, ) -> SysResult<()>191     pub fn reserve_range(
192         &mut self,
193         space: IoSpace,
194         start: u64,
195         length: u64,
196         async_write: bool,
197     ) -> SysResult<()> {
198         if length == 0 {
199             return Err(SysError::new(EINVAL));
200         }
201 
202         // Reject all cases where this reservation is part of another reservation.
203         if self.is_reserved(space, start) {
204             return Err(SysError::new(EPERM));
205         }
206 
207         let last_address = match start.checked_add(length) {
208             Some(end) => end - 1,
209             None => return Err(SysError::new(EINVAL)),
210         };
211 
212         let space = match space {
213             IoSpace::Ioport => &mut self.ioport_regions,
214             IoSpace::Mmio => &mut self.mmio_regions,
215         };
216 
217         match space
218             .range(..Range(last_address, 0, false))
219             .next_back()
220             .cloned()
221         {
222             Some(Range(existing_start, _, _)) if existing_start >= start => {
223                 Err(SysError::new(EPERM))
224             }
225             _ => {
226                 space.insert(Range(start, length, async_write));
227                 Ok(())
228             }
229         }
230     }
231 
232     //// Releases a reservation previously made at `start` in the given `space`.
unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()>233     pub fn unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()> {
234         let range = Range(start, 0, false);
235         let space = match space {
236             IoSpace::Ioport => &mut self.ioport_regions,
237             IoSpace::Mmio => &mut self.mmio_regions,
238         };
239         if space.remove(&range) {
240             Ok(())
241         } else {
242             Err(SysError::new(ENOENT))
243         }
244     }
245 
set_hint( &mut self, space: IoSpace, addr: u64, on_write: bool, regs: Vec<CallHintDetails>, )246     pub fn set_hint(
247         &mut self,
248         space: IoSpace,
249         addr: u64,
250         on_write: bool,
251         regs: Vec<CallHintDetails>,
252     ) {
253         if addr == 0 {
254             self.hint = None;
255         } else {
256             let hint = CallHint {
257                 io_space: space,
258                 addr,
259                 on_write,
260                 regs,
261             };
262             self.hint = Some(hint);
263         }
264     }
265 
is_reserved(&self, space: IoSpace, addr: u64) -> bool266     fn is_reserved(&self, space: IoSpace, addr: u64) -> bool {
267         if let Some(Range(start, len, _)) = self.first_before(space, addr) {
268             let offset = addr - start;
269             if offset < len {
270                 return true;
271             }
272         }
273         false
274     }
275 
first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range>276     fn first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range> {
277         let space = match io_space {
278             IoSpace::Ioport => &self.ioport_regions,
279             IoSpace::Mmio => &self.mmio_regions,
280         };
281 
282         match addr.checked_add(1) {
283             Some(next_addr) => space
284                 .range(..Range(next_addr, 0, false))
285                 .next_back()
286                 .cloned(),
287             None => None,
288         }
289     }
290 
matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool291     fn matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool {
292         if let Some(hint) = &self.hint {
293             return io_space == hint.io_space && addr == hint.addr && is_write == hint.on_write;
294         }
295         false
296     }
297 
check_hint_details(&self, regs: &kvm_regs) -> (bool, bool)298     fn check_hint_details(&self, regs: &kvm_regs) -> (bool, bool) {
299         if let Some(hint) = &self.hint {
300             for entry in hint.regs.iter() {
301                 if (!entry.match_rax || entry.rax == regs.rax)
302                     && (!entry.match_rbx || entry.rbx == regs.rbx)
303                     && (!entry.match_rcx || entry.rcx == regs.rcx)
304                     && (!entry.match_rdx || entry.rdx == regs.rdx)
305                 {
306                     return (entry.send_sregs, entry.send_debugregs);
307                 }
308             }
309         }
310         (false, false)
311     }
312 }
313 
314 /// State specific to a VCPU, grouped so that each `PluginVcpu` object will share a canonical
315 /// version.
316 #[derive(Default)]
317 pub struct PerVcpuState {
318     pause_request: Option<u64>,
319 }
320 
321 impl PerVcpuState {
322     /// Indicates that a VCPU should wait until the plugin process resumes the VCPU.
323     ///
324     /// This method will not cause a VCPU to pause immediately. Instead, the VCPU thread will
325     /// continue running until a interrupted, at which point it will check for a pending pause. If
326     /// there is another call to `request_pause` for this VCPU before that happens, the last pause
327     /// request's `data` will be overwritten with the most recent `data.
328     ///
329     /// To get an immediate pause after calling `request_pause`, send a signal (with a registered
330     /// handler) to the thread handling the VCPU corresponding to this state. This should interrupt
331     /// the running VCPU, which should check for a pause with `PluginVcpu::pre_run`.
request_pause(&mut self, data: u64)332     pub fn request_pause(&mut self, data: u64) {
333         self.pause_request = Some(data);
334     }
335 }
336 
337 enum VcpuRunData<'a> {
338     Read(&'a mut [u8]),
339     Write(&'a [u8]),
340 }
341 
342 impl<'a> VcpuRunData<'a> {
is_write(&self) -> bool343     fn is_write(&self) -> bool {
344         matches!(self, VcpuRunData::Write(_))
345     }
346 
as_slice(&self) -> &[u8]347     fn as_slice(&self) -> &[u8] {
348         match self {
349             VcpuRunData::Read(s) => s,
350             VcpuRunData::Write(s) => s,
351         }
352     }
353 
copy_from_slice(&mut self, data: &[u8])354     fn copy_from_slice(&mut self, data: &[u8]) {
355         if let VcpuRunData::Read(s) = self {
356             let copy_size = min(s.len(), data.len());
357             s.copy_from_slice(&data[..copy_size]);
358         }
359     }
360 }
361 
362 /// State object for a VCPU's connection with the plugin process.
363 ///
364 /// This is used by a VCPU thread to allow the plugin process to handle vmexits. Each method may
365 /// block indefinitely while the plugin process is handling requests. In order to cleanly shutdown
366 /// during these blocking calls, the `connection` socket should be shutdown. This will end the
367 /// blocking calls,
368 pub struct PluginVcpu {
369     shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
370     per_vcpu_state: Arc<Mutex<PerVcpuState>>,
371     read_pipe: File,
372     write_pipe: File,
373     wait_reason: Cell<Option<VcpuResponse_Wait>>,
374     request_buffer: RefCell<Vec<u8>>,
375     response_buffer: RefCell<Vec<u8>>,
376 }
377 
378 impl PluginVcpu {
379     /// Creates the plugin state and connection container for a VCPU thread.
new( shared_vcpu_state: Arc<RwLock<SharedVcpuState>>, per_vcpu_state: Arc<Mutex<PerVcpuState>>, read_pipe: File, write_pipe: File, ) -> PluginVcpu380     pub fn new(
381         shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
382         per_vcpu_state: Arc<Mutex<PerVcpuState>>,
383         read_pipe: File,
384         write_pipe: File,
385     ) -> PluginVcpu {
386         PluginVcpu {
387             shared_vcpu_state,
388             per_vcpu_state,
389             read_pipe,
390             write_pipe,
391             wait_reason: Default::default(),
392             request_buffer: Default::default(),
393             response_buffer: Default::default(),
394         }
395     }
396 
397     /// Tells the plugin process to initialize this VCPU.
398     ///
399     /// This should be called for each VCPU before the first run of any of the VCPUs in the VM.
init(&self, vcpu: &Vcpu) -> SysResult<()>400     pub fn init(&self, vcpu: &Vcpu) -> SysResult<()> {
401         let mut wait_reason = VcpuResponse_Wait::new();
402         wait_reason.mut_init();
403         self.wait_reason.set(Some(wait_reason));
404         self.handle_until_resume(vcpu)?;
405         Ok(())
406     }
407 
408     /// The VCPU thread should call this before rerunning a VM in order to handle pending requests
409     /// to this VCPU.
pre_run(&self, vcpu: &Vcpu) -> SysResult<()>410     pub fn pre_run(&self, vcpu: &Vcpu) -> SysResult<()> {
411         let request = {
412             let mut lock = self.per_vcpu_state.lock();
413             lock.pause_request.take()
414         };
415 
416         if let Some(user_data) = request {
417             let mut wait_reason = VcpuResponse_Wait::new();
418             wait_reason.mut_user().user = user_data;
419             self.wait_reason.set(Some(wait_reason));
420             self.handle_until_resume(vcpu)?;
421         }
422         Ok(())
423     }
424 
process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool425     fn process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool {
426         let vcpu_state_lock = match self.shared_vcpu_state.read() {
427             Ok(l) => l,
428             Err(e) => {
429                 error!("error read locking shared cpu state: {}", e);
430                 return false;
431             }
432         };
433 
434         let first_before_addr = vcpu_state_lock.first_before(io_space, addr);
435 
436         match first_before_addr {
437             Some(Range(start, len, async_write)) => {
438                 let offset = addr - start;
439                 if offset >= len {
440                     return false;
441                 }
442                 if async_write && !data.is_write() {
443                     return false;
444                 }
445 
446                 let mut wait_reason = VcpuResponse_Wait::new();
447                 let io = wait_reason.mut_io();
448                 io.space = match io_space {
449                     IoSpace::Ioport => AddressSpace::IOPORT,
450                     IoSpace::Mmio => AddressSpace::MMIO,
451                 };
452                 io.address = addr;
453                 io.is_write = data.is_write();
454                 io.data = data.as_slice().to_vec();
455                 io.no_resume = async_write;
456                 if !async_write && vcpu_state_lock.matches_hint(io_space, addr, io.is_write) {
457                     if let Ok(regs) = vcpu.get_regs() {
458                         let (has_sregs, has_debugregs) = vcpu_state_lock.check_hint_details(&regs);
459                         io.regs = VcpuRegs(regs).as_slice().to_vec();
460                         if has_sregs {
461                             if let Ok(state) = get_vcpu_state(vcpu, VcpuRequest_StateSet::SREGS) {
462                                 io.sregs = state;
463                             }
464                         }
465                         if has_debugregs {
466                             if let Ok(state) = get_vcpu_state(vcpu, VcpuRequest_StateSet::DEBUGREGS)
467                             {
468                                 io.debugregs = state;
469                             }
470                         }
471                     }
472                 }
473                 // don't hold lock while blocked in `handle_until_resume`.
474                 drop(vcpu_state_lock);
475 
476                 if async_write {
477                     let mut response = VcpuResponse::new();
478                     response.set_wait(wait_reason);
479 
480                     let mut response_buffer = self.response_buffer.borrow_mut();
481                     response_buffer.clear();
482                     let mut stream = CodedOutputStream::vec(&mut response_buffer);
483                     match response.write_length_delimited_to(&mut stream) {
484                         Ok(_) => {
485                             match stream.flush() {
486                                 Ok(_) => {}
487                                 Err(e) => error!("failed to flush to vec: {}", e),
488                             }
489                             let mut write_pipe = &self.write_pipe;
490                             match write_pipe.write(&response_buffer[..]) {
491                                 Ok(_) => {}
492                                 Err(e) => error!("failed to write to pipe: {}", e),
493                             }
494                         }
495                         Err(e) => error!("failed to write to buffer: {}", e),
496                     }
497                 } else {
498                     self.wait_reason.set(Some(wait_reason));
499                     match self.handle_until_resume(vcpu) {
500                         Ok(resume_data) => data.copy_from_slice(&resume_data),
501                         Err(e) if e.errno() == EPIPE => {}
502                         Err(e) => error!("failed to process vcpu requests: {}", e),
503                     }
504                 }
505                 true
506             }
507             None => false,
508         }
509     }
510 
511     /// Has the plugin process handle a IO port read.
io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool512     pub fn io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
513         self.process(IoSpace::Ioport, addr, VcpuRunData::Read(data), vcpu)
514     }
515 
516     /// Has the plugin process handle a IO port write.
io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool517     pub fn io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
518         self.process(IoSpace::Ioport, addr, VcpuRunData::Write(data), vcpu)
519     }
520 
521     /// Has the plugin process handle a MMIO read.
mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool522     pub fn mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
523         self.process(IoSpace::Mmio, addr, VcpuRunData::Read(data), vcpu)
524     }
525 
526     /// Has the plugin process handle a MMIO write.
mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool527     pub fn mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
528         self.process(IoSpace::Mmio, addr, VcpuRunData::Write(data), vcpu)
529     }
530 
531     /// Has the plugin process handle a hyper-v call.
hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool532     pub fn hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool {
533         let mut wait_reason = VcpuResponse_Wait::new();
534         let hv = wait_reason.mut_hyperv_call();
535         hv.input = input;
536         hv.params0 = params[0];
537         hv.params1 = params[1];
538 
539         self.wait_reason.set(Some(wait_reason));
540         match self.handle_until_resume(vcpu) {
541             Ok(resume_data) => {
542                 data.copy_from_slice(&resume_data);
543                 true
544             }
545             Err(e) if e.errno() == EPIPE => false,
546             Err(e) => {
547                 error!("failed to process hyperv call request: {}", e);
548                 false
549             }
550         }
551     }
552 
553     /// Has the plugin process handle a synic config change.
hyperv_synic( &self, msr: u32, control: u64, evt_page: u64, msg_page: u64, vcpu: &Vcpu, ) -> bool554     pub fn hyperv_synic(
555         &self,
556         msr: u32,
557         control: u64,
558         evt_page: u64,
559         msg_page: u64,
560         vcpu: &Vcpu,
561     ) -> bool {
562         let mut wait_reason = VcpuResponse_Wait::new();
563         let hv = wait_reason.mut_hyperv_synic();
564         hv.msr = msr;
565         hv.control = control;
566         hv.evt_page = evt_page;
567         hv.msg_page = msg_page;
568         self.wait_reason.set(Some(wait_reason));
569         match self.handle_until_resume(vcpu) {
570             Ok(_resume_data) => true,
571             Err(e) if e.errno() == EPIPE => false,
572             Err(e) => {
573                 error!("failed to process hyperv synic request: {}", e);
574                 false
575             }
576         }
577     }
578 
handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>>579     fn handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>> {
580         let mut wait_reason = self.wait_reason.take();
581         let mut do_recv = true;
582         let mut resume_data = None;
583         let mut response = VcpuResponse::new();
584         let mut send_response = true;
585 
586         // Typically a response is sent for every request received.  The odd (yet common)
587         // case is when a resume request is received.  This function will skip sending
588         // a resume reply, and instead we'll go run the VM and then later reply with a wait
589         // response message.  This code block handles checking if a wait reason is pending (where
590         // the wait reason isn't the first-time init [first time init needs to first
591         // receive a wait request from the plugin]) to send it as a reply before doing a recv()
592         // for the next request.  Note that if a wait reply is pending then this function
593         // will send the reply and do nothing else--the expectation is that handle_until_resume()
594         // is the only caller of this function, so the function will immediately get called again
595         // and this second call will no longer see a pending wait reason and do a recv() for the
596         // next message.
597         if let Some(reason) = wait_reason {
598             if reason.has_init() {
599                 wait_reason = Some(reason);
600             } else {
601                 response.set_wait(reason);
602                 do_recv = false;
603                 wait_reason = None;
604             }
605         }
606 
607         if do_recv {
608             let mut request_buffer = self.request_buffer.borrow_mut();
609             request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0);
610 
611             let mut read_pipe = &self.read_pipe;
612             let msg_size = read_pipe.read(&mut request_buffer).map_err(io_to_sys_err)?;
613 
614             let mut request: VcpuRequest =
615                 Message::parse_from_bytes(&request_buffer[..msg_size]).map_err(proto_to_sys_err)?;
616 
617             let res = if request.has_wait() {
618                 match wait_reason {
619                     Some(wait_reason) => {
620                         response.set_wait(wait_reason);
621                         Ok(())
622                     }
623                     None => Err(SysError::new(EPROTO)),
624                 }
625             } else if wait_reason.is_some() {
626                 // Any request other than getting the wait_reason while there is one pending is invalid.
627                 self.wait_reason.set(wait_reason);
628                 Err(SysError::new(EPROTO))
629             } else if request.has_resume() {
630                 send_response = false;
631                 let resume = request.get_resume();
632                 if !resume.get_regs().is_empty() {
633                     set_vcpu_state(vcpu, VcpuRequest_StateSet::REGS, resume.get_regs())?;
634                 }
635                 if !resume.get_sregs().is_empty() {
636                     set_vcpu_state(vcpu, VcpuRequest_StateSet::SREGS, resume.get_sregs())?;
637                 }
638                 if !resume.get_debugregs().is_empty() {
639                     set_vcpu_state(
640                         vcpu,
641                         VcpuRequest_StateSet::DEBUGREGS,
642                         resume.get_debugregs(),
643                     )?;
644                 }
645                 resume_data = Some(request.take_resume().take_data());
646                 Ok(())
647             } else if request.has_get_state() {
648                 let response_state = response.mut_get_state();
649                 match get_vcpu_state(vcpu, request.get_get_state().set) {
650                     Ok(state) => {
651                         response_state.state = state;
652                         Ok(())
653                     }
654                     Err(e) => Err(e),
655                 }
656             } else if request.has_set_state() {
657                 response.mut_set_state();
658                 let set_state = request.get_set_state();
659                 set_vcpu_state(vcpu, set_state.set, set_state.get_state())
660             } else if request.has_get_hyperv_cpuid() {
661                 let cpuid_response = &mut response.mut_get_hyperv_cpuid().entries;
662                 match vcpu.get_hyperv_cpuid() {
663                     Ok(mut cpuid) => {
664                         for entry in cpuid.mut_entries_slice() {
665                             cpuid_response.push(cpuid_kvm_to_proto(entry));
666                         }
667                         Ok(())
668                     }
669                     Err(e) => Err(e),
670                 }
671             } else if request.has_get_msrs() {
672                 let entry_data = &mut response.mut_get_msrs().entry_data;
673                 let entry_indices = &request.get_get_msrs().entry_indices;
674                 let mut msr_entries = Vec::with_capacity(entry_indices.len());
675                 for &index in entry_indices {
676                     msr_entries.push(kvm_msr_entry {
677                         index,
678                         ..Default::default()
679                     });
680                 }
681                 match vcpu.get_msrs(&mut msr_entries) {
682                     Ok(()) => {
683                         for msr_entry in msr_entries {
684                             entry_data.push(msr_entry.data);
685                         }
686                         Ok(())
687                     }
688                     Err(e) => Err(e),
689                 }
690             } else if request.has_set_msrs() {
691                 const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
692                 const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
693                 const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
694                 const_assert!(ALIGN_OF_MSRS >= mem::align_of::<kvm_msr_entry>());
695 
696                 response.mut_set_msrs();
697                 let request_entries = &request.get_set_msrs().entries;
698 
699                 let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY;
700                 let layout =
701                     Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
702                 let mut allocation = LayoutAllocation::zeroed(layout);
703 
704                 // Safe to obtain an exclusive reference because there are no other
705                 // references to the allocation yet and all-zero is a valid bit
706                 // pattern.
707                 let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() };
708 
709                 unsafe {
710                     // Mapping the unsized array to a slice is unsafe becase the length isn't known.
711                     // Providing the length used to create the struct guarantees the entire slice is
712                     // valid.
713                     let kvm_msr_entries: &mut [kvm_msr_entry] =
714                         kvm_msrs.entries.as_mut_slice(request_entries.len());
715                     for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) {
716                         msr_entry.index = entry.index;
717                         msr_entry.data = entry.data;
718                     }
719                 }
720                 kvm_msrs.nmsrs = request_entries.len() as u32;
721                 vcpu.set_msrs(kvm_msrs)
722             } else if request.has_set_cpuid() {
723                 response.mut_set_cpuid();
724                 let request_entries = &request.get_set_cpuid().entries;
725                 let mut cpuid = CpuId::new(request_entries.len());
726                 let cpuid_entries = cpuid.mut_entries_slice();
727                 for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) {
728                     cpuid_entry.function = request_entry.function;
729                     if request_entry.has_index {
730                         cpuid_entry.index = request_entry.index;
731                         cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
732                     }
733                     cpuid_entry.eax = request_entry.eax;
734                     cpuid_entry.ebx = request_entry.ebx;
735                     cpuid_entry.ecx = request_entry.ecx;
736                     cpuid_entry.edx = request_entry.edx;
737                 }
738                 vcpu.set_cpuid2(&cpuid)
739             } else if request.has_enable_capability() {
740                 response.mut_enable_capability();
741                 let capability = request.get_enable_capability().capability;
742                 if capability != kvm_sys::KVM_CAP_HYPERV_SYNIC
743                     && capability != kvm_sys::KVM_CAP_HYPERV_SYNIC2
744                 {
745                     Err(SysError::new(EINVAL))
746                 } else {
747                     let cap = kvm_enable_cap {
748                         cap: capability,
749                         ..Default::default()
750                     };
751                     // Safe because the allowed capabilities don't take pointer arguments.
752                     unsafe { vcpu.kvm_enable_cap(&cap) }
753                 }
754             } else if request.has_shutdown() {
755                 return Err(SysError::new(EPIPE));
756             } else {
757                 Err(SysError::new(ENOTTY))
758             };
759 
760             if let Err(e) = res {
761                 response.errno = e.errno();
762             }
763         }
764 
765         // Send the response, except if it's a resume response (in which case
766         // we'll go run the VM and afterwards send a wait response message).
767         if send_response {
768             let mut response_buffer = self.response_buffer.borrow_mut();
769             response_buffer.clear();
770             let mut stream = CodedOutputStream::vec(&mut response_buffer);
771             response
772                 .write_length_delimited_to(&mut stream)
773                 .map_err(proto_to_sys_err)?;
774             stream.flush().map_err(proto_to_sys_err)?;
775             let mut write_pipe = &self.write_pipe;
776             write_pipe
777                 .write(&response_buffer[..])
778                 .map_err(io_to_sys_err)?;
779         }
780 
781         Ok(resume_data)
782     }
783 
handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>>784     fn handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>> {
785         loop {
786             if let Some(resume_data) = self.handle_request(vcpu)? {
787                 return Ok(resume_data);
788             }
789         }
790     }
791 }
792 
793 #[cfg(test)]
794 mod tests {
795     use super::*;
796 
797     #[test]
shared_vcpu_reserve()798     fn shared_vcpu_reserve() {
799         let mut shared_vcpu_state = SharedVcpuState::default();
800         shared_vcpu_state
801             .reserve_range(IoSpace::Ioport, 0x10, 0, false)
802             .unwrap_err();
803         shared_vcpu_state
804             .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
805             .unwrap();
806         shared_vcpu_state
807             .reserve_range(IoSpace::Ioport, 0x0f, 0x10, false)
808             .unwrap_err();
809         shared_vcpu_state
810             .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
811             .unwrap_err();
812         shared_vcpu_state
813             .reserve_range(IoSpace::Ioport, 0x10, 0x15, false)
814             .unwrap_err();
815         shared_vcpu_state
816             .reserve_range(IoSpace::Ioport, 0x12, 0x15, false)
817             .unwrap_err();
818         shared_vcpu_state
819             .reserve_range(IoSpace::Ioport, 0x12, 0x01, false)
820             .unwrap_err();
821         shared_vcpu_state
822             .reserve_range(IoSpace::Ioport, 0x0, 0x20, false)
823             .unwrap_err();
824         shared_vcpu_state
825             .reserve_range(IoSpace::Ioport, 0x20, 0x05, false)
826             .unwrap();
827         shared_vcpu_state
828             .reserve_range(IoSpace::Ioport, 0x25, 0x05, false)
829             .unwrap();
830         shared_vcpu_state
831             .reserve_range(IoSpace::Ioport, 0x0, 0x10, false)
832             .unwrap();
833     }
834 }
835