android-14.0.0_r21/s

// Copyright 2018 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

use std::alloc::Layout;
use std::cell::Cell;
use std::cell::RefCell;
use std::cmp;
use std::cmp::min;
use std::cmp::Ord;
use std::cmp::PartialEq;
use std::cmp::PartialOrd;
use std::collections::btree_set::BTreeSet;
use std::io::Read;
use std::io::Write;
use std::mem;
use std::sync::Arc;
use std::sync::RwLock;

use base::error;
use base::LayoutAllocation;
use data_model::DataInit;
use kvm::CpuId;
use kvm::Vcpu;
use kvm_sys::kvm_debugregs;
use kvm_sys::kvm_enable_cap;
use kvm_sys::kvm_fpu;
use kvm_sys::kvm_lapic_state;
use kvm_sys::kvm_mp_state;
use kvm_sys::kvm_msr_entry;
use kvm_sys::kvm_msrs;
use kvm_sys::kvm_regs;
use kvm_sys::kvm_sregs;
use kvm_sys::kvm_vcpu_events;
use kvm_sys::kvm_xcrs;
use kvm_sys::KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
use libc::EINVAL;
use libc::ENOENT;
use libc::ENOTTY;
use libc::EPERM;
use libc::EPIPE;
use libc::EPROTO;
use protobuf::CodedOutputStream;
use protobuf::Message;
use protos::plugin::*;
use static_assertions::const_assert;
use sync::Mutex;

use super::*;

/// Identifier for an address space in the VM.
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum IoSpace {
    Ioport,
    Mmio,
}

#[derive(Debug, Copy, Clone)]
struct Range(u64, u64, bool);

impl Eq for Range {}

impl PartialEq for Range {
    fn eq(&self, other: &Range) -> bool {
        self.0 == other.0
    }
}

impl Ord for Range {
    fn cmp(&self, other: &Range) -> cmp::Ordering {
        self.0.cmp(&other.0)
    }
}

impl PartialOrd for Range {
    fn partial_cmp(&self, other: &Range) -> Option<cmp::Ordering> {
        self.0.partial_cmp(&other.0)
    }
}

// Wrapper types to make the kvm register structs DataInit
#[derive(Copy, Clone)]
struct VcpuRegs(kvm_regs);
unsafe impl DataInit for VcpuRegs {}
#[derive(Copy, Clone)]
struct VcpuSregs(kvm_sregs);
unsafe impl DataInit for VcpuSregs {}
#[derive(Copy, Clone)]
struct VcpuFpu(kvm_fpu);
unsafe impl DataInit for VcpuFpu {}
#[derive(Copy, Clone)]
struct VcpuDebugregs(kvm_debugregs);
unsafe impl DataInit for VcpuDebugregs {}
#[derive(Copy, Clone)]
struct VcpuXcregs(kvm_xcrs);
unsafe impl DataInit for VcpuXcregs {}
#[derive(Copy, Clone)]
struct VcpuLapicState(kvm_lapic_state);
unsafe impl DataInit for VcpuLapicState {}
#[derive(Copy, Clone)]
struct VcpuMpState(kvm_mp_state);
unsafe impl DataInit for VcpuMpState {}
#[derive(Copy, Clone)]
struct VcpuEvents(kvm_vcpu_events);
unsafe impl DataInit for VcpuEvents {}

fn get_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet) -> SysResult<Vec<u8>> {
    Ok(match state_set {
        VcpuRequest_StateSet::REGS => VcpuRegs(vcpu.get_regs()?).as_slice().to_vec(),
        VcpuRequest_StateSet::SREGS => VcpuSregs(vcpu.get_sregs()?).as_slice().to_vec(),
        VcpuRequest_StateSet::FPU => VcpuFpu(vcpu.get_fpu()?).as_slice().to_vec(),
        VcpuRequest_StateSet::DEBUGREGS => VcpuDebugregs(vcpu.get_debugregs()?).as_slice().to_vec(),
        VcpuRequest_StateSet::XCREGS => VcpuXcregs(vcpu.get_xcrs()?).as_slice().to_vec(),
        VcpuRequest_StateSet::LAPIC => VcpuLapicState(vcpu.get_lapic()?).as_slice().to_vec(),
        VcpuRequest_StateSet::MP => VcpuMpState(vcpu.get_mp_state()?).as_slice().to_vec(),
        VcpuRequest_StateSet::EVENTS => VcpuEvents(vcpu.get_vcpu_events()?).as_slice().to_vec(),
    })
}

fn set_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet, state: &[u8]) -> SysResult<()> {
    match state_set {
        VcpuRequest_StateSet::REGS => {
            vcpu.set_regs(&VcpuRegs::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
        }
        VcpuRequest_StateSet::SREGS => {
            vcpu.set_sregs(&VcpuSregs::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
        }
        VcpuRequest_StateSet::FPU => {
            vcpu.set_fpu(&VcpuFpu::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
        }
        VcpuRequest_StateSet::DEBUGREGS => vcpu.set_debugregs(
            &VcpuDebugregs::from_slice(state)
                .ok_or(SysError::new(EINVAL))?
                .0,
        ),
        VcpuRequest_StateSet::XCREGS => vcpu.set_xcrs(
            &VcpuXcregs::from_slice(state)
                .ok_or(SysError::new(EINVAL))?
                .0,
        ),
        VcpuRequest_StateSet::LAPIC => vcpu.set_lapic(
            &VcpuLapicState::from_slice(state)
                .ok_or(SysError::new(EINVAL))?
                .0,
        ),
        VcpuRequest_StateSet::MP => vcpu.set_mp_state(
            &VcpuMpState::from_slice(state)
                .ok_or(SysError::new(EINVAL))?
                .0,
        ),
        VcpuRequest_StateSet::EVENTS => vcpu.set_vcpu_events(
            &VcpuEvents::from_slice(state)
                .ok_or(SysError::new(EINVAL))?
                .0,
        ),
    }
}

pub struct CallHintDetails {
    pub match_rax: bool,
    pub match_rbx: bool,
    pub match_rcx: bool,
    pub match_rdx: bool,
    pub rax: u64,
    pub rbx: u64,
    pub rcx: u64,
    pub rdx: u64,
    pub send_sregs: bool,
    pub send_debugregs: bool,
}

pub struct CallHint {
    io_space: IoSpace,
    addr: u64,
    on_write: bool,
    regs: Vec<CallHintDetails>,
}

/// State shared by every VCPU, grouped together to make edits to the state coherent across VCPUs.
#[derive(Default)]
pub struct SharedVcpuState {
    ioport_regions: BTreeSet<Range>,
    mmio_regions: BTreeSet<Range>,
    hint: Option<CallHint>,
}

impl SharedVcpuState {
    /// Reserves the given range for handling by the plugin process.
    ///
    /// This will reject any reservation that overlaps with an existing reservation.
    pub fn reserve_range(
        &mut self,
        space: IoSpace,
        start: u64,
        length: u64,
        async_write: bool,
    ) -> SysResult<()> {
        if length == 0 {
            return Err(SysError::new(EINVAL));
        }

        // Reject all cases where this reservation is part of another reservation.
        if self.is_reserved(space, start) {
            return Err(SysError::new(EPERM));
        }

        let last_address = match start.checked_add(length) {
            Some(end) => end - 1,
            None => return Err(SysError::new(EINVAL)),
        };

        let space = match space {
            IoSpace::Ioport => &mut self.ioport_regions,
            IoSpace::Mmio => &mut self.mmio_regions,
        };

        match space
            .range(..Range(last_address, 0, false))
            .next_back()
            .cloned()
        {
            Some(Range(existing_start, _, _)) if existing_start >= start => {
                Err(SysError::new(EPERM))
            }
            _ => {
                space.insert(Range(start, length, async_write));
                Ok(())
            }
        }
    }

    //// Releases a reservation previously made at `start` in the given `space`.
    pub fn unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()> {
        let range = Range(start, 0, false);
        let space = match space {
            IoSpace::Ioport => &mut self.ioport_regions,
            IoSpace::Mmio => &mut self.mmio_regions,
        };
        if space.remove(&range) {
            Ok(())
        } else {
            Err(SysError::new(ENOENT))
        }
    }

    pub fn set_hint(
        &mut self,
        space: IoSpace,
        addr: u64,
        on_write: bool,
        regs: Vec<CallHintDetails>,
    ) {
        if addr == 0 {
            self.hint = None;
        } else {
            let hint = CallHint {
                io_space: space,
                addr,
                on_write,
                regs,
            };
            self.hint = Some(hint);
        }
    }

    fn is_reserved(&self, space: IoSpace, addr: u64) -> bool {
        if let Some(Range(start, len, _)) = self.first_before(space, addr) {
            let offset = addr - start;
            if offset < len {
                return true;
            }
        }
        false
    }

    fn first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range> {
        let space = match io_space {
            IoSpace::Ioport => &self.ioport_regions,
            IoSpace::Mmio => &self.mmio_regions,
        };

        match addr.checked_add(1) {
            Some(next_addr) => space
                .range(..Range(next_addr, 0, false))
                .next_back()
                .cloned(),
            None => None,
        }
    }

    fn matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool {
        if let Some(hint) = &self.hint {
            return io_space == hint.io_space && addr == hint.addr && is_write == hint.on_write;
        }
        false
    }

    fn check_hint_details(&self, regs: &kvm_regs) -> (bool, bool) {
        if let Some(hint) = &self.hint {
            for entry in hint.regs.iter() {
                if (!entry.match_rax || entry.rax == regs.rax)
                    && (!entry.match_rbx || entry.rbx == regs.rbx)
                    && (!entry.match_rcx || entry.rcx == regs.rcx)
                    && (!entry.match_rdx || entry.rdx == regs.rdx)
                {
                    return (entry.send_sregs, entry.send_debugregs);
                }
            }
        }
        (false, false)
    }
}

/// State specific to a VCPU, grouped so that each `PluginVcpu` object will share a canonical
/// version.
#[derive(Default)]
pub struct PerVcpuState {
    pause_request: Option<u64>,
}

impl PerVcpuState {
    /// Indicates that a VCPU should wait until the plugin process resumes the VCPU.
    ///
    /// This method will not cause a VCPU to pause immediately. Instead, the VCPU thread will
    /// continue running until a interrupted, at which point it will check for a pending pause. If
    /// there is another call to `request_pause` for this VCPU before that happens, the last pause
    /// request's `data` will be overwritten with the most recent `data.
    ///
    /// To get an immediate pause after calling `request_pause`, send a signal (with a registered
    /// handler) to the thread handling the VCPU corresponding to this state. This should interrupt
    /// the running VCPU, which should check for a pause with `PluginVcpu::pre_run`.
    pub fn request_pause(&mut self, data: u64) {
        self.pause_request = Some(data);
    }
}

enum VcpuRunData<'a> {
    Read(&'a mut [u8]),
    Write(&'a [u8]),
}

impl<'a> VcpuRunData<'a> {
    fn is_write(&self) -> bool {
        matches!(self, VcpuRunData::Write(_))
    }

    fn as_slice(&self) -> &[u8] {
        match self {
            VcpuRunData::Read(s) => s,
            VcpuRunData::Write(s) => s,
        }
    }

    fn copy_from_slice(&mut self, data: &[u8]) {
        if let VcpuRunData::Read(s) = self {
            let copy_size = min(s.len(), data.len());
            s.copy_from_slice(&data[..copy_size]);
        }
    }
}

/// State object for a VCPU's connection with the plugin process.
///
/// This is used by a VCPU thread to allow the plugin process to handle vmexits. Each method may
/// block indefinitely while the plugin process is handling requests. In order to cleanly shutdown
/// during these blocking calls, the `connection` socket should be shutdown. This will end the
/// blocking calls,
pub struct PluginVcpu {
    shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
    per_vcpu_state: Arc<Mutex<PerVcpuState>>,
    read_pipe: File,
    write_pipe: File,
    wait_reason: Cell<Option<VcpuResponse_Wait>>,
    request_buffer: RefCell<Vec<u8>>,
    response_buffer: RefCell<Vec<u8>>,
}

impl PluginVcpu {
    /// Creates the plugin state and connection container for a VCPU thread.
    pub fn new(
        shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
        per_vcpu_state: Arc<Mutex<PerVcpuState>>,
        read_pipe: File,
        write_pipe: File,
    ) -> PluginVcpu {
        PluginVcpu {
            shared_vcpu_state,
            per_vcpu_state,
            read_pipe,
            write_pipe,
            wait_reason: Default::default(),
            request_buffer: Default::default(),
            response_buffer: Default::default(),
        }
    }

    /// Tells the plugin process to initialize this VCPU.
    ///
    /// This should be called for each VCPU before the first run of any of the VCPUs in the VM.
    pub fn init(&self, vcpu: &Vcpu) -> SysResult<()> {
        let mut wait_reason = VcpuResponse_Wait::new();
        wait_reason.mut_init();
        self.wait_reason.set(Some(wait_reason));
        self.handle_until_resume(vcpu)?;
        Ok(())
    }

    /// The VCPU thread should call this before rerunning a VM in order to handle pending requests
    /// to this VCPU.
    pub fn pre_run(&self, vcpu: &Vcpu) -> SysResult<()> {
        let request = {
            let mut lock = self.per_vcpu_state.lock();
            lock.pause_request.take()
        };

        if let Some(user_data) = request {
            let mut wait_reason = VcpuResponse_Wait::new();
            wait_reason.mut_user().user = user_data;
            self.wait_reason.set(Some(wait_reason));
            self.handle_until_resume(vcpu)?;
        }
        Ok(())
    }

    fn process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool {
        let vcpu_state_lock = match self.shared_vcpu_state.read() {
            Ok(l) => l,
            Err(e) => {
                error!("error read locking shared cpu state: {}", e);
                return false;
            }
        };

        let first_before_addr = vcpu_state_lock.first_before(io_space, addr);

        match first_before_addr {
            Some(Range(start, len, async_write)) => {
                let offset = addr - start;
                if offset >= len {
                    return false;
                }
                if async_write && !data.is_write() {
                    return false;
                }

                let mut wait_reason = VcpuResponse_Wait::new();
                let io = wait_reason.mut_io();
                io.space = match io_space {
                    IoSpace::Ioport => AddressSpace::IOPORT,
                    IoSpace::Mmio => AddressSpace::MMIO,
                };
                io.address = addr;
                io.is_write = data.is_write();
                io.data = data.as_slice().to_vec();
                io.no_resume = async_write;
                if !async_write && vcpu_state_lock.matches_hint(io_space, addr, io.is_write) {
                    if let Ok(regs) = vcpu.get_regs() {
                        let (has_sregs, has_debugregs) = vcpu_state_lock.check_hint_details(&regs);
                        io.regs = VcpuRegs(regs).as_slice().to_vec();
                        if has_sregs {
                            if let Ok(state) = get_vcpu_state(vcpu, VcpuRequest_StateSet::SREGS) {
                                io.sregs = state;
                            }
                        }
                        if has_debugregs {
                            if let Ok(state) = get_vcpu_state(vcpu, VcpuRequest_StateSet::DEBUGREGS)
                            {
                                io.debugregs = state;
                            }
                        }
                    }
                }
                // don't hold lock while blocked in `handle_until_resume`.
                drop(vcpu_state_lock);

                if async_write {
                    let mut response = VcpuResponse::new();
                    response.set_wait(wait_reason);

                    let mut response_buffer = self.response_buffer.borrow_mut();
                    response_buffer.clear();
                    let mut stream = CodedOutputStream::vec(&mut response_buffer);
                    match response.write_length_delimited_to(&mut stream) {
                        Ok(_) => {
                            match stream.flush() {
                                Ok(_) => {}
                                Err(e) => error!("failed to flush to vec: {}", e),
                            }
                            let mut write_pipe = &self.write_pipe;
                            match write_pipe.write(&response_buffer[..]) {
                                Ok(_) => {}
                                Err(e) => error!("failed to write to pipe: {}", e),
                            }
                        }
                        Err(e) => error!("failed to write to buffer: {}", e),
                    }
                } else {
                    self.wait_reason.set(Some(wait_reason));
                    match self.handle_until_resume(vcpu) {
                        Ok(resume_data) => data.copy_from_slice(&resume_data),
                        Err(e) if e.errno() == EPIPE => {}
                        Err(e) => error!("failed to process vcpu requests: {}", e),
                    }
                }
                true
            }
            None => false,
        }
    }

    /// Has the plugin process handle a IO port read.
    pub fn io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
        self.process(IoSpace::Ioport, addr, VcpuRunData::Read(data), vcpu)
    }

    /// Has the plugin process handle a IO port write.
    pub fn io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
        self.process(IoSpace::Ioport, addr, VcpuRunData::Write(data), vcpu)
    }

    /// Has the plugin process handle a MMIO read.
    pub fn mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
        self.process(IoSpace::Mmio, addr, VcpuRunData::Read(data), vcpu)
    }

    /// Has the plugin process handle a MMIO write.
    pub fn mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
        self.process(IoSpace::Mmio, addr, VcpuRunData::Write(data), vcpu)
    }

    /// Has the plugin process handle a hyper-v call.
    pub fn hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool {
        let mut wait_reason = VcpuResponse_Wait::new();
        let hv = wait_reason.mut_hyperv_call();
        hv.input = input;
        hv.params0 = params[0];
        hv.params1 = params[1];

        self.wait_reason.set(Some(wait_reason));
        match self.handle_until_resume(vcpu) {
            Ok(resume_data) => {
                data.copy_from_slice(&resume_data);
                true
            }
            Err(e) if e.errno() == EPIPE => false,
            Err(e) => {
                error!("failed to process hyperv call request: {}", e);
                false
            }
        }
    }

    /// Has the plugin process handle a synic config change.
    pub fn hyperv_synic(
        &self,
        msr: u32,
        control: u64,
        evt_page: u64,
        msg_page: u64,
        vcpu: &Vcpu,
    ) -> bool {
        let mut wait_reason = VcpuResponse_Wait::new();
        let hv = wait_reason.mut_hyperv_synic();
        hv.msr = msr;
        hv.control = control;
        hv.evt_page = evt_page;
        hv.msg_page = msg_page;
        self.wait_reason.set(Some(wait_reason));
        match self.handle_until_resume(vcpu) {
            Ok(_resume_data) => true,
            Err(e) if e.errno() == EPIPE => false,
            Err(e) => {
                error!("failed to process hyperv synic request: {}", e);
                false
            }
        }
    }

    fn handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>> {
        let mut wait_reason = self.wait_reason.take();
        let mut do_recv = true;
        let mut resume_data = None;
        let mut response = VcpuResponse::new();
        let mut send_response = true;

        // Typically a response is sent for every request received.  The odd (yet common)
        // case is when a resume request is received.  This function will skip sending
        // a resume reply, and instead we'll go run the VM and then later reply with a wait
        // response message.  This code block handles checking if a wait reason is pending (where
        // the wait reason isn't the first-time init [first time init needs to first
        // receive a wait request from the plugin]) to send it as a reply before doing a recv()
        // for the next request.  Note that if a wait reply is pending then this function
        // will send the reply and do nothing else--the expectation is that handle_until_resume()
        // is the only caller of this function, so the function will immediately get called again
        // and this second call will no longer see a pending wait reason and do a recv() for the
        // next message.
        if let Some(reason) = wait_reason {
            if reason.has_init() {
                wait_reason = Some(reason);
            } else {
                response.set_wait(reason);
                do_recv = false;
                wait_reason = None;
            }
        }

        if do_recv {
            let mut request_buffer = self.request_buffer.borrow_mut();
            request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0);

            let mut read_pipe = &self.read_pipe;
            let msg_size = read_pipe.read(&mut request_buffer).map_err(io_to_sys_err)?;

            let mut request: VcpuRequest =
                Message::parse_from_bytes(&request_buffer[..msg_size]).map_err(proto_to_sys_err)?;

            let res = if request.has_wait() {
                match wait_reason {
                    Some(wait_reason) => {
                        response.set_wait(wait_reason);
                        Ok(())
                    }
                    None => Err(SysError::new(EPROTO)),
                }
            } else if wait_reason.is_some() {
                // Any request other than getting the wait_reason while there is one pending is invalid.
                self.wait_reason.set(wait_reason);
                Err(SysError::new(EPROTO))
            } else if request.has_resume() {
                send_response = false;
                let resume = request.get_resume();
                if !resume.get_regs().is_empty() {
                    set_vcpu_state(vcpu, VcpuRequest_StateSet::REGS, resume.get_regs())?;
                }
                if !resume.get_sregs().is_empty() {
                    set_vcpu_state(vcpu, VcpuRequest_StateSet::SREGS, resume.get_sregs())?;
                }
                if !resume.get_debugregs().is_empty() {
                    set_vcpu_state(
                        vcpu,
                        VcpuRequest_StateSet::DEBUGREGS,
                        resume.get_debugregs(),
                    )?;
                }
                resume_data = Some(request.take_resume().take_data());
                Ok(())
            } else if request.has_get_state() {
                let response_state = response.mut_get_state();
                match get_vcpu_state(vcpu, request.get_get_state().set) {
                    Ok(state) => {
                        response_state.state = state;
                        Ok(())
                    }
                    Err(e) => Err(e),
                }
            } else if request.has_set_state() {
                response.mut_set_state();
                let set_state = request.get_set_state();
                set_vcpu_state(vcpu, set_state.set, set_state.get_state())
            } else if request.has_get_hyperv_cpuid() {
                let cpuid_response = &mut response.mut_get_hyperv_cpuid().entries;
                match vcpu.get_hyperv_cpuid() {
                    Ok(mut cpuid) => {
                        for entry in cpuid.mut_entries_slice() {
                            cpuid_response.push(cpuid_kvm_to_proto(entry));
                        }
                        Ok(())
                    }
                    Err(e) => Err(e),
                }
            } else if request.has_get_msrs() {
                let entry_data = &mut response.mut_get_msrs().entry_data;
                let entry_indices = &request.get_get_msrs().entry_indices;
                let mut msr_entries = Vec::with_capacity(entry_indices.len());
                for &index in entry_indices {
                    msr_entries.push(kvm_msr_entry {
                        index,
                        ..Default::default()
                    });
                }
                match vcpu.get_msrs(&mut msr_entries) {
                    Ok(()) => {
                        for msr_entry in msr_entries {
                            entry_data.push(msr_entry.data);
                        }
                        Ok(())
                    }
                    Err(e) => Err(e),
                }
            } else if request.has_set_msrs() {
                const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
                const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
                const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
                const_assert!(ALIGN_OF_MSRS >= mem::align_of::<kvm_msr_entry>());

                response.mut_set_msrs();
                let request_entries = &request.get_set_msrs().entries;

                let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY;
                let layout =
                    Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
                let mut allocation = LayoutAllocation::zeroed(layout);

                // Safe to obtain an exclusive reference because there are no other
                // references to the allocation yet and all-zero is a valid bit
                // pattern.
                let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() };

                unsafe {
                    // Mapping the unsized array to a slice is unsafe becase the length isn't known.
                    // Providing the length used to create the struct guarantees the entire slice is
                    // valid.
                    let kvm_msr_entries: &mut [kvm_msr_entry] =
                        kvm_msrs.entries.as_mut_slice(request_entries.len());
                    for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) {
                        msr_entry.index = entry.index;
                        msr_entry.data = entry.data;
                    }
                }
                kvm_msrs.nmsrs = request_entries.len() as u32;
                vcpu.set_msrs(kvm_msrs)
            } else if request.has_set_cpuid() {
                response.mut_set_cpuid();
                let request_entries = &request.get_set_cpuid().entries;
                let mut cpuid = CpuId::new(request_entries.len());
                let cpuid_entries = cpuid.mut_entries_slice();
                for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) {
                    cpuid_entry.function = request_entry.function;
                    if request_entry.has_index {
                        cpuid_entry.index = request_entry.index;
                        cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                    }
                    cpuid_entry.eax = request_entry.eax;
                    cpuid_entry.ebx = request_entry.ebx;
                    cpuid_entry.ecx = request_entry.ecx;
                    cpuid_entry.edx = request_entry.edx;
                }
                vcpu.set_cpuid2(&cpuid)
            } else if request.has_enable_capability() {
                response.mut_enable_capability();
                let capability = request.get_enable_capability().capability;
                if capability != kvm_sys::KVM_CAP_HYPERV_SYNIC
                    && capability != kvm_sys::KVM_CAP_HYPERV_SYNIC2
                {
                    Err(SysError::new(EINVAL))
                } else {
                    let cap = kvm_enable_cap {
                        cap: capability,
                        ..Default::default()
                    };
                    // Safe because the allowed capabilities don't take pointer arguments.
                    unsafe { vcpu.kvm_enable_cap(&cap) }
                }
            } else if request.has_shutdown() {
                return Err(SysError::new(EPIPE));
            } else {
                Err(SysError::new(ENOTTY))
            };

            if let Err(e) = res {
                response.errno = e.errno();
            }
        }

        // Send the response, except if it's a resume response (in which case
        // we'll go run the VM and afterwards send a wait response message).
        if send_response {
            let mut response_buffer = self.response_buffer.borrow_mut();
            response_buffer.clear();
            let mut stream = CodedOutputStream::vec(&mut response_buffer);
            response
                .write_length_delimited_to(&mut stream)
                .map_err(proto_to_sys_err)?;
            stream.flush().map_err(proto_to_sys_err)?;
            let mut write_pipe = &self.write_pipe;
            write_pipe
                .write(&response_buffer[..])
                .map_err(io_to_sys_err)?;
        }

        Ok(resume_data)
    }

    fn handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>> {
        loop {
            if let Some(resume_data) = self.handle_request(vcpu)? {
                return Ok(resume_data);
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn shared_vcpu_reserve() {
        let mut shared_vcpu_state = SharedVcpuState::default();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x10, 0, false)
            .unwrap_err();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
            .unwrap();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x0f, 0x10, false)
            .unwrap_err();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
            .unwrap_err();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x10, 0x15, false)
            .unwrap_err();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x12, 0x15, false)
            .unwrap_err();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x12, 0x01, false)
            .unwrap_err();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x0, 0x20, false)
            .unwrap_err();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x20, 0x05, false)
            .unwrap();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x25, 0x05, false)
            .unwrap();
        shared_vcpu_state
            .reserve_range(IoSpace::Ioport, 0x0, 0x10, false)
            .unwrap();
    }
}