• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::sync::Arc;
6 
7 use anyhow::anyhow;
8 use anyhow::Context;
9 use base::error;
10 #[cfg(not(test))]
11 use base::Clock;
12 use base::Error;
13 use base::Event;
14 #[cfg(test)]
15 use base::FakeClock as Clock;
16 use base::Result;
17 use base::Tube;
18 use hypervisor::kvm::KvmVcpu;
19 use hypervisor::kvm::KvmVm;
20 use hypervisor::HypervisorCap;
21 use hypervisor::IoapicState;
22 use hypervisor::IrqRoute;
23 use hypervisor::IrqSource;
24 use hypervisor::IrqSourceChip;
25 use hypervisor::LapicState;
26 use hypervisor::MPState;
27 use hypervisor::PicSelect;
28 use hypervisor::PicState;
29 use hypervisor::PitState;
30 use hypervisor::Vcpu;
31 use hypervisor::VcpuX86_64;
32 use hypervisor::Vm;
33 use hypervisor::VmX86_64;
34 use kvm_sys::*;
35 use resources::SystemAllocator;
36 use serde::Deserialize;
37 use serde::Serialize;
38 use sync::Mutex;
39 
40 use crate::irqchip::Ioapic;
41 use crate::irqchip::IrqEvent;
42 use crate::irqchip::IrqEventIndex;
43 use crate::irqchip::Pic;
44 use crate::irqchip::VcpuRunState;
45 use crate::irqchip::IOAPIC_BASE_ADDRESS;
46 use crate::irqchip::IOAPIC_MEM_LENGTH_BYTES;
47 use crate::Bus;
48 use crate::IrqChip;
49 use crate::IrqChipCap;
50 use crate::IrqChipX86_64;
51 use crate::IrqEdgeEvent;
52 use crate::IrqEventSource;
53 use crate::IrqLevelEvent;
54 use crate::Pit;
55 use crate::PitError;
56 
57 /// PIT tube 0 timer is connected to IRQ 0
58 const PIT_CHANNEL0_IRQ: u32 = 0;
59 
60 /// Default x86 routing table.  Pins 0-7 go to primary pic and ioapic, pins 8-15 go to secondary
61 /// pic and ioapic, and pins 16-23 go only to the ioapic.
kvm_default_irq_routing_table(ioapic_pins: usize) -> Vec<IrqRoute>62 fn kvm_default_irq_routing_table(ioapic_pins: usize) -> Vec<IrqRoute> {
63     let mut routes: Vec<IrqRoute> = Vec::new();
64 
65     for i in 0..8 {
66         routes.push(IrqRoute::pic_irq_route(IrqSourceChip::PicPrimary, i));
67         routes.push(IrqRoute::ioapic_irq_route(i));
68     }
69     for i in 8..16 {
70         routes.push(IrqRoute::pic_irq_route(IrqSourceChip::PicSecondary, i));
71         routes.push(IrqRoute::ioapic_irq_route(i));
72     }
73     for i in 16..ioapic_pins as u32 {
74         routes.push(IrqRoute::ioapic_irq_route(i));
75     }
76 
77     routes
78 }
79 
80 /// IrqChip implementation where the entire IrqChip is emulated by KVM.
81 ///
82 /// This implementation will use the KVM API to create and configure the in-kernel irqchip.
83 pub struct KvmKernelIrqChip {
84     pub(super) vm: KvmVm,
85     pub(super) vcpus: Arc<Mutex<Vec<Option<KvmVcpu>>>>,
86     pub(super) routes: Arc<Mutex<Vec<IrqRoute>>>,
87 }
88 
89 #[derive(Serialize, Deserialize)]
90 struct KvmKernelIrqChipSnapshot {
91     routes: Vec<IrqRoute>,
92     // apic_base and interrupt_bitmap are part of the IrqChip, despite the
93     // fact that we get the values from the Vcpu ioctl "KVM_GET_SREGS".
94     // Contains 1 entry per Vcpu.
95     apic_base: Vec<u64>,
96     interrupt_bitmap: Vec<[u64; 4usize]>,
97 }
98 
99 impl KvmKernelIrqChip {
100     /// Construct a new KvmKernelIrqchip.
new(vm: KvmVm, num_vcpus: usize) -> Result<KvmKernelIrqChip>101     pub fn new(vm: KvmVm, num_vcpus: usize) -> Result<KvmKernelIrqChip> {
102         vm.create_irq_chip()?;
103         vm.create_pit()?;
104         let ioapic_pins = vm.get_ioapic_num_pins()?;
105 
106         Ok(KvmKernelIrqChip {
107             vm,
108             vcpus: Arc::new(Mutex::new((0..num_vcpus).map(|_| None).collect())),
109             routes: Arc::new(Mutex::new(kvm_default_irq_routing_table(ioapic_pins))),
110         })
111     }
112     /// Attempt to create a shallow clone of this x86_64 KvmKernelIrqChip instance.
arch_try_clone(&self) -> Result<Self>113     pub(super) fn arch_try_clone(&self) -> Result<Self> {
114         Ok(KvmKernelIrqChip {
115             vm: self.vm.try_clone()?,
116             vcpus: self.vcpus.clone(),
117             routes: self.routes.clone(),
118         })
119     }
120 }
121 
122 impl IrqChipX86_64 for KvmKernelIrqChip {
try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>>123     fn try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>> {
124         Ok(Box::new(self.try_clone()?))
125     }
126 
as_irq_chip(&self) -> &dyn IrqChip127     fn as_irq_chip(&self) -> &dyn IrqChip {
128         self
129     }
130 
as_irq_chip_mut(&mut self) -> &mut dyn IrqChip131     fn as_irq_chip_mut(&mut self) -> &mut dyn IrqChip {
132         self
133     }
134 
135     /// Get the current state of the PIC
get_pic_state(&self, select: PicSelect) -> Result<PicState>136     fn get_pic_state(&self, select: PicSelect) -> Result<PicState> {
137         Ok(PicState::from(&self.vm.get_pic_state(select)?))
138     }
139 
140     /// Set the current state of the PIC
set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()>141     fn set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()> {
142         self.vm.set_pic_state(select, &kvm_pic_state::from(state))
143     }
144 
145     /// Get the current state of the IOAPIC
get_ioapic_state(&self) -> Result<IoapicState>146     fn get_ioapic_state(&self) -> Result<IoapicState> {
147         Ok(IoapicState::from(&self.vm.get_ioapic_state()?))
148     }
149 
150     /// Set the current state of the IOAPIC
set_ioapic_state(&mut self, state: &IoapicState) -> Result<()>151     fn set_ioapic_state(&mut self, state: &IoapicState) -> Result<()> {
152         self.vm.set_ioapic_state(&kvm_ioapic_state::from(state))
153     }
154 
155     /// Get the current state of the specified VCPU's local APIC
get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState>156     fn get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
157         match self.vcpus.lock().get(vcpu_id) {
158             Some(Some(vcpu)) => Ok(LapicState::from(&vcpu.get_lapic()?)),
159             _ => Err(Error::new(libc::ENOENT)),
160         }
161     }
162 
163     /// Set the current state of the specified VCPU's local APIC
set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>164     fn set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
165         match self.vcpus.lock().get(vcpu_id) {
166             Some(Some(vcpu)) => vcpu.set_lapic(&kvm_lapic_state::from(state)),
167             _ => Err(Error::new(libc::ENOENT)),
168         }
169     }
170 
171     /// Get the lapic frequency in Hz
lapic_frequency(&self) -> u32172     fn lapic_frequency(&self) -> u32 {
173         // KVM emulates the lapic to have a bus frequency of 1GHz
174         1_000_000_000
175     }
176 
177     /// Retrieves the state of the PIT. Gets the pit state via the KVM API.
get_pit(&self) -> Result<PitState>178     fn get_pit(&self) -> Result<PitState> {
179         Ok(PitState::from(&self.vm.get_pit_state()?))
180     }
181 
182     /// Sets the state of the PIT. Sets the pit state via the KVM API.
set_pit(&mut self, state: &PitState) -> Result<()>183     fn set_pit(&mut self, state: &PitState) -> Result<()> {
184         self.vm.set_pit_state(&kvm_pit_state2::from(state))
185     }
186 
187     /// Returns true if the PIT uses port 0x61 for the PC speaker, false if 0x61 is unused.
188     /// KVM's kernel PIT doesn't use 0x61.
pit_uses_speaker_port(&self) -> bool189     fn pit_uses_speaker_port(&self) -> bool {
190         false
191     }
192 
snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value>193     fn snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value> {
194         let mut apics: Vec<u64> = Vec::new();
195         let mut interrupt_bitmaps: Vec<[u64; 4usize]> = Vec::new();
196         {
197             let vcpus_lock = self.vcpus.lock();
198             for vcpu in (*vcpus_lock).iter().flatten() {
199                 apics.push(vcpu.get_apic_base()?);
200                 interrupt_bitmaps.push(vcpu.get_interrupt_bitmap()?);
201             }
202         }
203         serde_json::to_value(KvmKernelIrqChipSnapshot {
204             routes: self.routes.lock().clone(),
205             apic_base: apics,
206             interrupt_bitmap: interrupt_bitmaps,
207         })
208         .context("failed to serialize KvmKernelIrqChip")
209     }
210 
restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()>211     fn restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
212         let deser: KvmKernelIrqChipSnapshot =
213             serde_json::from_value(data).context("failed to deserialize data")?;
214         self.set_irq_routes(&deser.routes)?;
215         let vcpus_lock = self.vcpus.lock();
216         assert_eq!(deser.interrupt_bitmap.len(), vcpus_lock.len());
217         assert_eq!(deser.apic_base.len(), vcpus_lock.len());
218         for (i, vcpu) in vcpus_lock.iter().enumerate() {
219             if let Some(vcpu) = vcpu {
220                 vcpu.set_apic_base(*deser.apic_base.get(i).unwrap())?;
221                 vcpu.set_interrupt_bitmap(*deser.interrupt_bitmap.get(i).unwrap())?;
222             } else {
223                 return Err(anyhow!(
224                     "Received None instead of Vcpu while restoring apic_base and interrupt_bitmap"
225                 ));
226             }
227         }
228         Ok(())
229     }
230 }
231 
232 /// The KvmSplitIrqsChip supports KVM's SPLIT_IRQCHIP feature, where the PIC and IOAPIC
233 /// are emulated in userspace, while the local APICs are emulated in the kernel.
234 /// The SPLIT_IRQCHIP feature only supports x86/x86_64 so we only define this IrqChip in crosvm
235 /// for x86/x86_64.
236 pub struct KvmSplitIrqChip {
237     vm: KvmVm,
238     vcpus: Arc<Mutex<Vec<Option<KvmVcpu>>>>,
239     routes: Arc<Mutex<Vec<IrqRoute>>>,
240     pit: Arc<Mutex<Pit>>,
241     pic: Arc<Mutex<Pic>>,
242     ioapic: Arc<Mutex<Ioapic>>,
243     ioapic_pins: usize,
244     /// Vec of ioapic irq events that have been delayed because the ioapic was locked when
245     /// service_irq was called on the irqchip. This prevents deadlocks when a Vcpu thread has
246     /// locked the ioapic and the ioapic sends a AddMsiRoute signal to the main thread (which
247     /// itself may be busy trying to call service_irq).
248     delayed_ioapic_irq_events: Arc<Mutex<Vec<usize>>>,
249     /// Event which is meant to trigger process of any irqs events that were delayed.
250     delayed_ioapic_irq_trigger: Event,
251     /// Array of Events that devices will use to assert ioapic pins.
252     irq_events: Arc<Mutex<Vec<Option<IrqEvent>>>>,
253 }
254 
kvm_dummy_msi_routes(ioapic_pins: usize) -> Vec<IrqRoute>255 fn kvm_dummy_msi_routes(ioapic_pins: usize) -> Vec<IrqRoute> {
256     let mut routes: Vec<IrqRoute> = Vec::new();
257     for i in 0..ioapic_pins {
258         routes.push(
259             // Add dummy MSI routes to replace the default IRQChip routes.
260             IrqRoute {
261                 gsi: i as u32,
262                 source: IrqSource::Msi {
263                     address: 0,
264                     data: 0,
265                 },
266             },
267         );
268     }
269     routes
270 }
271 
272 impl KvmSplitIrqChip {
273     /// Construct a new KvmSplitIrqChip.
new( vm: KvmVm, num_vcpus: usize, irq_tube: Tube, ioapic_pins: Option<usize>, ) -> Result<Self>274     pub fn new(
275         vm: KvmVm,
276         num_vcpus: usize,
277         irq_tube: Tube,
278         ioapic_pins: Option<usize>,
279     ) -> Result<Self> {
280         let ioapic_pins = ioapic_pins.unwrap_or(vm.get_ioapic_num_pins()?);
281         vm.enable_split_irqchip(ioapic_pins)?;
282         let pit_evt = IrqEdgeEvent::new()?;
283         let pit = Pit::new(pit_evt.try_clone()?, Arc::new(Mutex::new(Clock::new()))).map_err(
284             |e| match e {
285                 PitError::CloneEvent(err) => err,
286                 PitError::CreateEvent(err) => err,
287                 PitError::CreateWaitContext(err) => err,
288                 PitError::WaitError(err) => err,
289                 PitError::TimerCreateError(err) => err,
290                 PitError::SpawnThread(_) => Error::new(libc::EIO),
291             },
292         )?;
293 
294         let pit_event_source = IrqEventSource::from_device(&pit);
295 
296         let mut chip = KvmSplitIrqChip {
297             vm,
298             vcpus: Arc::new(Mutex::new((0..num_vcpus).map(|_| None).collect())),
299             routes: Arc::new(Mutex::new(Vec::new())),
300             pit: Arc::new(Mutex::new(pit)),
301             pic: Arc::new(Mutex::new(Pic::new())),
302             ioapic: Arc::new(Mutex::new(Ioapic::new(irq_tube, ioapic_pins)?)),
303             ioapic_pins,
304             delayed_ioapic_irq_events: Arc::new(Mutex::new(Vec::new())),
305             delayed_ioapic_irq_trigger: Event::new()?,
306             irq_events: Arc::new(Mutex::new(Default::default())),
307         };
308 
309         // Setup standard x86 irq routes
310         let mut routes = kvm_default_irq_routing_table(ioapic_pins);
311         // Add dummy MSI routes for the first ioapic_pins GSIs
312         routes.append(&mut kvm_dummy_msi_routes(ioapic_pins));
313 
314         // Set the routes so they get sent to KVM
315         chip.set_irq_routes(&routes)?;
316 
317         chip.register_edge_irq_event(PIT_CHANNEL0_IRQ, &pit_evt, pit_event_source)?;
318         Ok(chip)
319     }
320 }
321 
322 impl KvmSplitIrqChip {
323     /// Convenience function for determining which chips the supplied irq routes to.
routes_to_chips(&self, irq: u32) -> Vec<(IrqSourceChip, u32)>324     fn routes_to_chips(&self, irq: u32) -> Vec<(IrqSourceChip, u32)> {
325         let mut chips = Vec::new();
326         for route in self.routes.lock().iter() {
327             match route {
328                 IrqRoute {
329                     gsi,
330                     source: IrqSource::Irqchip { chip, pin },
331                 } if *gsi == irq => match chip {
332                     IrqSourceChip::PicPrimary
333                     | IrqSourceChip::PicSecondary
334                     | IrqSourceChip::Ioapic => chips.push((*chip, *pin)),
335                     IrqSourceChip::Gic => {
336                         error!("gic irq should not be possible on a KvmSplitIrqChip")
337                     }
338                     IrqSourceChip::Aia => {
339                         error!("Aia irq should not be possible on x86_64")
340                     }
341                 },
342                 // Ignore MSIs and other routes
343                 _ => {}
344             }
345         }
346         chips
347     }
348 
349     /// Return true if there is a pending interrupt for the specified vcpu. For KvmSplitIrqChip
350     /// this calls interrupt_requested on the pic.
interrupt_requested(&self, vcpu_id: usize) -> bool351     pub fn interrupt_requested(&self, vcpu_id: usize) -> bool {
352         // Pic interrupts for the split irqchip only go to vcpu 0
353         if vcpu_id != 0 {
354             return false;
355         }
356         self.pic.lock().interrupt_requested()
357     }
358 
359     /// Check if the specified vcpu has any pending interrupts. Returns None for no interrupts,
360     /// otherwise Some(u32) should be the injected interrupt vector. For KvmSplitIrqChip
361     /// this calls get_external_interrupt on the pic.
get_external_interrupt(&self, vcpu_id: usize) -> Option<u32>362     pub fn get_external_interrupt(&self, vcpu_id: usize) -> Option<u32> {
363         // Pic interrupts for the split irqchip only go to vcpu 0
364         if vcpu_id != 0 {
365             return None;
366         }
367         self.pic
368             .lock()
369             .get_external_interrupt()
370             .map(|vector| vector as u32)
371     }
372 
373     /// Register an event that can trigger an interrupt for a particular GSI.
register_irq_event( &mut self, irq: u32, irq_event: &Event, resample_event: Option<&Event>, source: IrqEventSource, ) -> Result<Option<IrqEventIndex>>374     fn register_irq_event(
375         &mut self,
376         irq: u32,
377         irq_event: &Event,
378         resample_event: Option<&Event>,
379         source: IrqEventSource,
380     ) -> Result<Option<IrqEventIndex>> {
381         if irq < self.ioapic_pins as u32 {
382             let mut evt = IrqEvent {
383                 gsi: irq,
384                 event: irq_event.try_clone()?,
385                 resample_event: None,
386                 source,
387             };
388 
389             if let Some(resample_event) = resample_event {
390                 evt.resample_event = Some(resample_event.try_clone()?);
391             }
392 
393             let mut irq_events = self.irq_events.lock();
394             let index = irq_events.len();
395             irq_events.push(Some(evt));
396             Ok(Some(index))
397         } else {
398             self.vm.register_irqfd(irq, irq_event, resample_event)?;
399             Ok(None)
400         }
401     }
402 
403     /// Unregister an event for a particular GSI.
unregister_irq_event(&mut self, irq: u32, irq_event: &Event) -> Result<()>404     fn unregister_irq_event(&mut self, irq: u32, irq_event: &Event) -> Result<()> {
405         if irq < self.ioapic_pins as u32 {
406             let mut irq_events = self.irq_events.lock();
407             for (index, evt) in irq_events.iter().enumerate() {
408                 if let Some(evt) = evt {
409                     if evt.gsi == irq && irq_event.eq(&evt.event) {
410                         irq_events[index] = None;
411                         break;
412                     }
413                 }
414             }
415             Ok(())
416         } else {
417             self.vm.unregister_irqfd(irq, irq_event)
418         }
419     }
420 }
421 
422 /// Convenience function for determining whether or not two irq routes conflict.
423 /// Returns true if they conflict.
routes_conflict(route: &IrqRoute, other: &IrqRoute) -> bool424 fn routes_conflict(route: &IrqRoute, other: &IrqRoute) -> bool {
425     // They don't conflict if they have different GSIs.
426     if route.gsi != other.gsi {
427         return false;
428     }
429 
430     // If they're both MSI with the same GSI then they conflict.
431     if let (IrqSource::Msi { .. }, IrqSource::Msi { .. }) = (route.source, other.source) {
432         return true;
433     }
434 
435     // If the route chips match and they have the same GSI then they conflict.
436     if let (
437         IrqSource::Irqchip {
438             chip: route_chip, ..
439         },
440         IrqSource::Irqchip {
441             chip: other_chip, ..
442         },
443     ) = (route.source, other.source)
444     {
445         return route_chip == other_chip;
446     }
447 
448     // Otherwise they do not conflict.
449     false
450 }
451 
452 /// This IrqChip only works with Kvm so we only implement it for KvmVcpu.
453 impl IrqChip for KvmSplitIrqChip {
454     /// Add a vcpu to the irq chip.
add_vcpu(&mut self, vcpu_id: usize, vcpu: &dyn Vcpu) -> Result<()>455     fn add_vcpu(&mut self, vcpu_id: usize, vcpu: &dyn Vcpu) -> Result<()> {
456         let vcpu: &KvmVcpu = vcpu
457             .downcast_ref()
458             .expect("KvmSplitIrqChip::add_vcpu called with non-KvmVcpu");
459         self.vcpus.lock()[vcpu_id] = Some(vcpu.try_clone()?);
460         Ok(())
461     }
462 
463     /// Register an event that can trigger an interrupt for a particular GSI.
register_edge_irq_event( &mut self, irq: u32, irq_event: &IrqEdgeEvent, source: IrqEventSource, ) -> Result<Option<IrqEventIndex>>464     fn register_edge_irq_event(
465         &mut self,
466         irq: u32,
467         irq_event: &IrqEdgeEvent,
468         source: IrqEventSource,
469     ) -> Result<Option<IrqEventIndex>> {
470         self.register_irq_event(irq, irq_event.get_trigger(), None, source)
471     }
472 
unregister_edge_irq_event(&mut self, irq: u32, irq_event: &IrqEdgeEvent) -> Result<()>473     fn unregister_edge_irq_event(&mut self, irq: u32, irq_event: &IrqEdgeEvent) -> Result<()> {
474         self.unregister_irq_event(irq, irq_event.get_trigger())
475     }
476 
register_level_irq_event( &mut self, irq: u32, irq_event: &IrqLevelEvent, source: IrqEventSource, ) -> Result<Option<IrqEventIndex>>477     fn register_level_irq_event(
478         &mut self,
479         irq: u32,
480         irq_event: &IrqLevelEvent,
481         source: IrqEventSource,
482     ) -> Result<Option<IrqEventIndex>> {
483         self.register_irq_event(
484             irq,
485             irq_event.get_trigger(),
486             Some(irq_event.get_resample()),
487             source,
488         )
489     }
490 
unregister_level_irq_event(&mut self, irq: u32, irq_event: &IrqLevelEvent) -> Result<()>491     fn unregister_level_irq_event(&mut self, irq: u32, irq_event: &IrqLevelEvent) -> Result<()> {
492         self.unregister_irq_event(irq, irq_event.get_trigger())
493     }
494 
495     /// Route an IRQ line to an interrupt controller, or to a particular MSI vector.
route_irq(&mut self, route: IrqRoute) -> Result<()>496     fn route_irq(&mut self, route: IrqRoute) -> Result<()> {
497         let mut routes = self.routes.lock();
498         routes.retain(|r| !routes_conflict(r, &route));
499 
500         routes.push(route);
501 
502         // We only call set_gsi_routing with the msi routes
503         let mut msi_routes = routes.clone();
504         msi_routes.retain(|r| matches!(r.source, IrqSource::Msi { .. }));
505 
506         self.vm.set_gsi_routing(&msi_routes)
507     }
508 
509     /// Replace all irq routes with the supplied routes
set_irq_routes(&mut self, routes: &[IrqRoute]) -> Result<()>510     fn set_irq_routes(&mut self, routes: &[IrqRoute]) -> Result<()> {
511         let mut current_routes = self.routes.lock();
512         *current_routes = routes.to_vec();
513 
514         // We only call set_gsi_routing with the msi routes
515         let mut msi_routes = routes.to_vec();
516         msi_routes.retain(|r| matches!(r.source, IrqSource::Msi { .. }));
517 
518         self.vm.set_gsi_routing(&msi_routes)
519     }
520 
521     /// Return a vector of all registered irq numbers and their associated events and event
522     /// indices. These should be used by the main thread to wait for irq events.
irq_event_tokens(&self) -> Result<Vec<(IrqEventIndex, IrqEventSource, Event)>>523     fn irq_event_tokens(&self) -> Result<Vec<(IrqEventIndex, IrqEventSource, Event)>> {
524         let mut tokens = vec![];
525         for (index, evt) in self.irq_events.lock().iter().enumerate() {
526             if let Some(evt) = evt {
527                 tokens.push((index, evt.source.clone(), evt.event.try_clone()?));
528             }
529         }
530         Ok(tokens)
531     }
532 
533     /// Either assert or deassert an IRQ line.  Sends to either an interrupt controller, or does
534     /// a send_msi if the irq is associated with an MSI.
service_irq(&mut self, irq: u32, level: bool) -> Result<()>535     fn service_irq(&mut self, irq: u32, level: bool) -> Result<()> {
536         let chips = self.routes_to_chips(irq);
537         for (chip, pin) in chips {
538             match chip {
539                 IrqSourceChip::PicPrimary | IrqSourceChip::PicSecondary => {
540                     self.pic.lock().service_irq(pin as u8, level);
541                 }
542                 IrqSourceChip::Ioapic => {
543                     self.ioapic.lock().service_irq(pin as usize, level);
544                 }
545                 _ => {}
546             }
547         }
548         Ok(())
549     }
550 
551     /// Service an IRQ event by asserting then deasserting an IRQ line. The associated Event
552     /// that triggered the irq event will be read from. If the irq is associated with a resample
553     /// Event, then the deassert will only happen after an EOI is broadcast for a vector
554     /// associated with the irq line.
555     /// For the KvmSplitIrqChip, this function identifies which chips the irq routes to, then
556     /// attempts to call service_irq on those chips. If the ioapic is unable to be immediately
557     /// locked, we add the irq to the delayed_ioapic_irq_events Vec (though we still read
558     /// from the Event that triggered the irq event).
service_irq_event(&mut self, event_index: IrqEventIndex) -> Result<()>559     fn service_irq_event(&mut self, event_index: IrqEventIndex) -> Result<()> {
560         if let Some(evt) = &self.irq_events.lock()[event_index] {
561             evt.event.wait()?;
562             let chips = self.routes_to_chips(evt.gsi);
563 
564             for (chip, pin) in chips {
565                 match chip {
566                     IrqSourceChip::PicPrimary | IrqSourceChip::PicSecondary => {
567                         let mut pic = self.pic.lock();
568                         pic.service_irq(pin as u8, true);
569                         if evt.resample_event.is_none() {
570                             pic.service_irq(pin as u8, false);
571                         }
572                     }
573                     IrqSourceChip::Ioapic => {
574                         if let Ok(mut ioapic) = self.ioapic.try_lock() {
575                             ioapic.service_irq(pin as usize, true);
576                             if evt.resample_event.is_none() {
577                                 ioapic.service_irq(pin as usize, false);
578                             }
579                         } else {
580                             self.delayed_ioapic_irq_events.lock().push(event_index);
581                             self.delayed_ioapic_irq_trigger.signal().unwrap();
582                         }
583                     }
584                     _ => {}
585                 }
586             }
587         }
588 
589         Ok(())
590     }
591 
592     /// Broadcast an end of interrupt. For KvmSplitIrqChip this sends the EOI to the ioapic
broadcast_eoi(&self, vector: u8) -> Result<()>593     fn broadcast_eoi(&self, vector: u8) -> Result<()> {
594         self.ioapic.lock().end_of_interrupt(vector);
595         Ok(())
596     }
597 
598     /// Injects any pending interrupts for `vcpu`.
599     /// For KvmSplitIrqChip this injects any PIC interrupts on vcpu_id 0.
inject_interrupts(&self, vcpu: &dyn Vcpu) -> Result<()>600     fn inject_interrupts(&self, vcpu: &dyn Vcpu) -> Result<()> {
601         let vcpu: &KvmVcpu = vcpu
602             .downcast_ref()
603             .expect("KvmSplitIrqChip::add_vcpu called with non-KvmVcpu");
604 
605         let vcpu_id = vcpu.id();
606         if !self.interrupt_requested(vcpu_id) || !vcpu.ready_for_interrupt() {
607             return Ok(());
608         }
609 
610         if let Some(vector) = self.get_external_interrupt(vcpu_id) {
611             vcpu.interrupt(vector)?;
612         }
613 
614         // The second interrupt request should be handled immediately, so ask vCPU to exit as soon
615         // as possible.
616         if self.interrupt_requested(vcpu_id) {
617             vcpu.set_interrupt_window_requested(true);
618         }
619         Ok(())
620     }
621 
622     /// Notifies the irq chip that the specified VCPU has executed a halt instruction.
623     /// For KvmSplitIrqChip this is a no-op because KVM handles VCPU blocking.
halted(&self, _vcpu_id: usize)624     fn halted(&self, _vcpu_id: usize) {}
625 
626     /// Blocks until `vcpu` is in a runnable state or until interrupted by
627     /// `IrqChip::kick_halted_vcpus`.  Returns `VcpuRunState::Runnable if vcpu is runnable, or
628     /// `VcpuRunState::Interrupted` if the wait was interrupted.
629     /// For KvmSplitIrqChip this is a no-op and always returns Runnable because KVM handles VCPU
630     /// blocking.
wait_until_runnable(&self, _vcpu: &dyn Vcpu) -> Result<VcpuRunState>631     fn wait_until_runnable(&self, _vcpu: &dyn Vcpu) -> Result<VcpuRunState> {
632         Ok(VcpuRunState::Runnable)
633     }
634 
635     /// Makes unrunnable VCPUs return immediately from `wait_until_runnable`.
636     /// For KvmSplitIrqChip this is a no-op because KVM handles VCPU blocking.
kick_halted_vcpus(&self)637     fn kick_halted_vcpus(&self) {}
638 
639     /// Get the current MP state of the specified VCPU.
get_mp_state(&self, vcpu_id: usize) -> Result<MPState>640     fn get_mp_state(&self, vcpu_id: usize) -> Result<MPState> {
641         match self.vcpus.lock().get(vcpu_id) {
642             Some(Some(vcpu)) => Ok(MPState::from(&vcpu.get_mp_state()?)),
643             _ => Err(Error::new(libc::ENOENT)),
644         }
645     }
646 
647     /// Set the current MP state of the specified VCPU.
set_mp_state(&mut self, vcpu_id: usize, state: &MPState) -> Result<()>648     fn set_mp_state(&mut self, vcpu_id: usize, state: &MPState) -> Result<()> {
649         match self.vcpus.lock().get(vcpu_id) {
650             Some(Some(vcpu)) => vcpu.set_mp_state(&kvm_mp_state::from(state)),
651             _ => Err(Error::new(libc::ENOENT)),
652         }
653     }
654 
655     /// Attempt to clone this IrqChip instance.
try_clone(&self) -> Result<Self>656     fn try_clone(&self) -> Result<Self> {
657         Ok(KvmSplitIrqChip {
658             vm: self.vm.try_clone()?,
659             vcpus: self.vcpus.clone(),
660             routes: self.routes.clone(),
661             pit: self.pit.clone(),
662             pic: self.pic.clone(),
663             ioapic: self.ioapic.clone(),
664             ioapic_pins: self.ioapic_pins,
665             delayed_ioapic_irq_events: self.delayed_ioapic_irq_events.clone(),
666             delayed_ioapic_irq_trigger: Event::new()?,
667             irq_events: self.irq_events.clone(),
668         })
669     }
670 
671     /// Finalize irqchip setup. Should be called once all devices have registered irq events and
672     /// been added to the io_bus and mmio_bus.
finalize_devices( &mut self, resources: &mut SystemAllocator, io_bus: &Bus, mmio_bus: &Bus, ) -> Result<()>673     fn finalize_devices(
674         &mut self,
675         resources: &mut SystemAllocator,
676         io_bus: &Bus,
677         mmio_bus: &Bus,
678     ) -> Result<()> {
679         // Insert pit into io_bus
680         io_bus.insert(self.pit.clone(), 0x040, 0x8).unwrap();
681         io_bus.insert(self.pit.clone(), 0x061, 0x1).unwrap();
682 
683         // Insert pic into io_bus
684         io_bus.insert(self.pic.clone(), 0x20, 0x2).unwrap();
685         io_bus.insert(self.pic.clone(), 0xa0, 0x2).unwrap();
686         io_bus.insert(self.pic.clone(), 0x4d0, 0x2).unwrap();
687 
688         // Insert ioapic into mmio_bus
689         mmio_bus
690             .insert(
691                 self.ioapic.clone(),
692                 IOAPIC_BASE_ADDRESS,
693                 IOAPIC_MEM_LENGTH_BYTES,
694             )
695             .unwrap();
696 
697         // At this point, all of our devices have been created and they have registered their
698         // irq events, so we can clone our resample events
699         let mut ioapic_resample_events: Vec<Vec<Event>> =
700             (0..self.ioapic_pins).map(|_| Vec::new()).collect();
701         let mut pic_resample_events: Vec<Vec<Event>> =
702             (0..self.ioapic_pins).map(|_| Vec::new()).collect();
703 
704         for evt in self.irq_events.lock().iter().flatten() {
705             if (evt.gsi as usize) >= self.ioapic_pins {
706                 continue;
707             }
708             if let Some(resample_evt) = &evt.resample_event {
709                 ioapic_resample_events[evt.gsi as usize].push(resample_evt.try_clone()?);
710                 pic_resample_events[evt.gsi as usize].push(resample_evt.try_clone()?);
711             }
712         }
713 
714         // Register resample events with the ioapic
715         self.ioapic
716             .lock()
717             .register_resample_events(ioapic_resample_events);
718         // Register resample events with the pic
719         self.pic
720             .lock()
721             .register_resample_events(pic_resample_events);
722 
723         // Make sure all future irq numbers are beyond IO-APIC range.
724         let mut irq_num = resources.allocate_irq().unwrap();
725         while irq_num < self.ioapic_pins as u32 {
726             irq_num = resources.allocate_irq().unwrap();
727         }
728 
729         Ok(())
730     }
731 
732     /// The KvmSplitIrqChip's ioapic may be locked because a vcpu thread is currently writing to
733     /// the ioapic, and the ioapic may be blocking on adding MSI routes, which requires blocking
734     /// socket communication back to the main thread.  Thus, we do not want the main thread to
735     /// block on a locked ioapic, so any irqs that could not be serviced because the ioapic could
736     /// not be immediately locked are added to the delayed_ioapic_irq_events Vec. This function
737     /// processes each delayed event in the vec each time it's called. If the ioapic is still
738     /// locked, we keep the queued irqs for the next time this function is called.
process_delayed_irq_events(&mut self) -> Result<()>739     fn process_delayed_irq_events(&mut self) -> Result<()> {
740         self.delayed_ioapic_irq_events
741             .lock()
742             .retain(|&event_index| {
743                 if let Some(evt) = &self.irq_events.lock()[event_index] {
744                     if let Ok(mut ioapic) = self.ioapic.try_lock() {
745                         ioapic.service_irq(evt.gsi as usize, true);
746                         if evt.resample_event.is_none() {
747                             ioapic.service_irq(evt.gsi as usize, false);
748                         }
749 
750                         false
751                     } else {
752                         true
753                     }
754                 } else {
755                     true
756                 }
757             });
758 
759         if self.delayed_ioapic_irq_events.lock().is_empty() {
760             self.delayed_ioapic_irq_trigger.wait()?;
761         }
762 
763         Ok(())
764     }
765 
irq_delayed_event_token(&self) -> Result<Option<Event>>766     fn irq_delayed_event_token(&self) -> Result<Option<Event>> {
767         Ok(Some(self.delayed_ioapic_irq_trigger.try_clone()?))
768     }
769 
check_capability(&self, c: IrqChipCap) -> bool770     fn check_capability(&self, c: IrqChipCap) -> bool {
771         match c {
772             IrqChipCap::TscDeadlineTimer => self
773                 .vm
774                 .get_hypervisor()
775                 .check_capability(HypervisorCap::TscDeadlineTimer),
776             IrqChipCap::X2Apic => true,
777             IrqChipCap::MpStateGetSet => true,
778         }
779     }
780 }
781 
782 #[derive(Serialize, Deserialize)]
783 struct KvmSplitIrqChipSnapshot {
784     routes: Vec<IrqRoute>,
785 }
786 
787 impl IrqChipX86_64 for KvmSplitIrqChip {
try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>>788     fn try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>> {
789         Ok(Box::new(self.try_clone()?))
790     }
791 
as_irq_chip(&self) -> &dyn IrqChip792     fn as_irq_chip(&self) -> &dyn IrqChip {
793         self
794     }
795 
as_irq_chip_mut(&mut self) -> &mut dyn IrqChip796     fn as_irq_chip_mut(&mut self) -> &mut dyn IrqChip {
797         self
798     }
799 
800     /// Get the current state of the PIC
get_pic_state(&self, select: PicSelect) -> Result<PicState>801     fn get_pic_state(&self, select: PicSelect) -> Result<PicState> {
802         Ok(self.pic.lock().get_pic_state(select))
803     }
804 
805     /// Set the current state of the PIC
set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()>806     fn set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()> {
807         self.pic.lock().set_pic_state(select, state);
808         Ok(())
809     }
810 
811     /// Get the current state of the IOAPIC
get_ioapic_state(&self) -> Result<IoapicState>812     fn get_ioapic_state(&self) -> Result<IoapicState> {
813         Ok(self.ioapic.lock().get_ioapic_state())
814     }
815 
816     /// Set the current state of the IOAPIC
set_ioapic_state(&mut self, state: &IoapicState) -> Result<()>817     fn set_ioapic_state(&mut self, state: &IoapicState) -> Result<()> {
818         self.ioapic.lock().set_ioapic_state(state);
819         Ok(())
820     }
821 
822     /// Get the current state of the specified VCPU's local APIC
get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState>823     fn get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
824         match self.vcpus.lock().get(vcpu_id) {
825             Some(Some(vcpu)) => Ok(LapicState::from(&vcpu.get_lapic()?)),
826             _ => Err(Error::new(libc::ENOENT)),
827         }
828     }
829 
830     /// Set the current state of the specified VCPU's local APIC
set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>831     fn set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
832         match self.vcpus.lock().get(vcpu_id) {
833             Some(Some(vcpu)) => vcpu.set_lapic(&kvm_lapic_state::from(state)),
834             _ => Err(Error::new(libc::ENOENT)),
835         }
836     }
837 
838     /// Get the lapic frequency in Hz
lapic_frequency(&self) -> u32839     fn lapic_frequency(&self) -> u32 {
840         // KVM emulates the lapic to have a bus frequency of 1GHz
841         1_000_000_000
842     }
843 
844     /// Retrieves the state of the PIT. Gets the pit state via the KVM API.
get_pit(&self) -> Result<PitState>845     fn get_pit(&self) -> Result<PitState> {
846         Ok(self.pit.lock().get_pit_state())
847     }
848 
849     /// Sets the state of the PIT. Sets the pit state via the KVM API.
set_pit(&mut self, state: &PitState) -> Result<()>850     fn set_pit(&mut self, state: &PitState) -> Result<()> {
851         self.pit.lock().set_pit_state(state);
852         Ok(())
853     }
854 
855     /// Returns true if the PIT uses port 0x61 for the PC speaker, false if 0x61 is unused.
856     /// devices::Pit uses 0x61.
pit_uses_speaker_port(&self) -> bool857     fn pit_uses_speaker_port(&self) -> bool {
858         true
859     }
860 
snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value>861     fn snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value> {
862         serde_json::to_value(KvmSplitIrqChipSnapshot {
863             routes: self.routes.lock().clone(),
864         })
865         .context("failed to serialize KvmSplitIrqChip")
866     }
867 
restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()>868     fn restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
869         let deser: KvmSplitIrqChipSnapshot =
870             serde_json::from_value(data).context("failed to deserialize KvmSplitIrqChip")?;
871         self.set_irq_routes(&deser.routes)?;
872         Ok(())
873     }
874 }
875