• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::convert::TryInto;
6 
7 use anyhow::Context;
8 use base::error;
9 use base::info;
10 use base::AsRawDescriptor;
11 use base::Error as SysError;
12 use base::Event;
13 use base::RawDescriptor;
14 use base::Tube;
15 use base::TubeError;
16 use bit_field::*;
17 use remain::sorted;
18 use serde::Deserialize;
19 use serde::Serialize;
20 use snapshot::AnySnapshot;
21 use thiserror::Error;
22 use vm_control::VmIrqRequest;
23 use vm_control::VmIrqResponse;
24 use zerocopy::FromBytes;
25 use zerocopy::Immutable;
26 use zerocopy::IntoBytes;
27 use zerocopy::KnownLayout;
28 
29 use crate::pci::pci_configuration::PciCapConfig;
30 use crate::pci::pci_configuration::PciCapConfigWriteResult;
31 use crate::pci::PciCapability;
32 use crate::pci::PciCapabilityID;
33 
34 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
35 pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
36 pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
37 pub const BITS_PER_PBA_ENTRY: usize = 64;
38 const FUNCTION_MASK_BIT: u16 = 0x4000;
39 const MSIX_ENABLE_BIT: u16 = 0x8000;
40 const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1;
41 
42 #[derive(Serialize, Deserialize, Clone, Default)]
43 struct MsixTableEntry {
44     msg_addr_lo: u32,
45     msg_addr_hi: u32,
46     msg_data: u32,
47     vector_ctl: u32,
48 }
49 
50 impl MsixTableEntry {
masked(&self) -> bool51     fn masked(&self) -> bool {
52         self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT
53     }
54 }
55 
56 struct IrqfdGsi {
57     irqfd: Event,
58     gsi: u32,
59 }
60 
61 /// Wrapper over MSI-X Capability Structure and MSI-X Tables
62 pub struct MsixConfig {
63     table_entries: Vec<MsixTableEntry>,
64     pba_entries: Vec<u64>,
65     irq_vec: Vec<Option<IrqfdGsi>>,
66     masked: bool,
67     enabled: bool,
68     msi_device_socket: Tube,
69     msix_num: u16,
70     pci_id: u32,
71     device_name: String,
72 }
73 
74 #[derive(Serialize, Deserialize)]
75 struct MsixConfigSnapshot {
76     table_entries: Vec<MsixTableEntry>,
77     pba_entries: Vec<u64>,
78     /// Just like MsixConfig::irq_vec, but only the GSI.
79     irq_gsi_vec: Vec<Option<u32>>,
80     masked: bool,
81     enabled: bool,
82     msix_num: u16,
83     pci_id: u32,
84     device_name: String,
85 }
86 
87 #[sorted]
88 #[derive(Error, Debug)]
89 pub enum MsixError {
90     #[error("AddMsiRoute failed: {0}")]
91     AddMsiRoute(SysError),
92     #[error("failed to receive AddMsiRoute response: {0}")]
93     AddMsiRouteRecv(TubeError),
94     #[error("failed to send AddMsiRoute request: {0}")]
95     AddMsiRouteSend(TubeError),
96     #[error("AllocateOneMsi failed: {0}")]
97     AllocateOneMsi(SysError),
98     #[error("failed to receive AllocateOneMsi response: {0}")]
99     AllocateOneMsiRecv(TubeError),
100     #[error("failed to send AllocateOneMsi request: {0}")]
101     AllocateOneMsiSend(TubeError),
102     #[error("failed to deserialize snapshot: {0}")]
103     DeserializationFailed(anyhow::Error),
104     #[error("invalid vector length in snapshot: {0}")]
105     InvalidVectorLength(std::num::TryFromIntError),
106     #[error("ReleaseOneIrq failed: {0}")]
107     ReleaseOneIrq(base::Error),
108     #[error("failed to receive ReleaseOneIrq response: {0}")]
109     ReleaseOneIrqRecv(TubeError),
110     #[error("failed to send ReleaseOneIrq request: {0}")]
111     ReleaseOneIrqSend(TubeError),
112 }
113 
114 type MsixResult<T> = std::result::Result<T, MsixError>;
115 
116 #[derive(Copy, Clone)]
117 pub enum MsixStatus {
118     Changed,
119     EntryChanged(usize),
120     NothingToDo,
121 }
122 
123 impl PciCapConfigWriteResult for MsixStatus {}
124 
125 impl MsixConfig {
new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self126     pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self {
127         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
128 
129         let mut table_entries: Vec<MsixTableEntry> = Vec::new();
130         table_entries.resize_with(msix_vectors as usize, Default::default);
131         table_entries
132             .iter_mut()
133             .for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT);
134         let mut pba_entries: Vec<u64> = Vec::new();
135         let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY);
136         pba_entries.resize_with(num_pba_entries, Default::default);
137 
138         let mut irq_vec = Vec::new();
139         irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>);
140 
141         MsixConfig {
142             table_entries,
143             pba_entries,
144             irq_vec,
145             masked: false,
146             enabled: false,
147             msi_device_socket: vm_socket,
148             msix_num: msix_vectors,
149             pci_id,
150             device_name,
151         }
152     }
153 
154     /// Get the number of MSI-X vectors in this configuration.
num_vectors(&self) -> u16155     pub fn num_vectors(&self) -> u16 {
156         self.msix_num
157     }
158 
159     /// Check whether the Function Mask bit in Message Control word in set or not.
160     /// if 1, all of the vectors associated with the function are masked,
161     /// regardless of their per-vector Mask bit states.
162     /// If 0, each vector's Mask bit determines whether the vector is masked or not.
masked(&self) -> bool163     pub fn masked(&self) -> bool {
164         self.masked
165     }
166 
167     /// Check whether the Function Mask bit in MSIX table Message Control
168     /// word in set or not.
169     /// If true, the vector is masked.
170     /// If false, the vector is unmasked.
table_masked(&self, index: usize) -> bool171     pub fn table_masked(&self, index: usize) -> bool {
172         if index >= self.table_entries.len() {
173             true
174         } else {
175             self.table_entries[index].masked()
176         }
177     }
178 
179     /// Check whether the MSI-X Enable bit in Message Control word in set or not.
180     /// if 1, the function is permitted to use MSI-X to request service.
enabled(&self) -> bool181     pub fn enabled(&self) -> bool {
182         self.enabled
183     }
184 
185     /// Read the MSI-X Capability Structure.
186     /// The top 2 bits in Message Control word are emulated and all other
187     /// bits are read only.
read_msix_capability(&self, data: u32) -> u32188     pub fn read_msix_capability(&self, data: u32) -> u32 {
189         let mut msg_ctl = (data >> 16) as u16;
190         msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT);
191 
192         if self.enabled {
193             msg_ctl |= MSIX_ENABLE_BIT;
194         }
195         if self.masked {
196             msg_ctl |= FUNCTION_MASK_BIT;
197         }
198         (msg_ctl as u32) << 16 | (data & u16::MAX as u32)
199     }
200 
201     /// Write to the MSI-X Capability Structure.
202     /// Only the top 2 bits in Message Control Word are writable.
write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus203     pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
204         if offset == 2 && data.len() == 2 {
205             let reg = u16::from_le_bytes([data[0], data[1]]);
206             let old_masked = self.masked;
207             let old_enabled = self.enabled;
208 
209             self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT;
210             self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT;
211 
212             if !old_enabled && self.enabled {
213                 if let Err(e) = self.msix_enable_all() {
214                     error!("failed to enable MSI-X: {}", e);
215                     self.enabled = false;
216                 }
217             }
218 
219             // If the Function Mask bit was set, and has just been cleared, it's
220             // important to go through the entire PBA to check if there was any
221             // pending MSI-X message to inject, given that the vector is not
222             // masked.
223             if old_masked && !self.masked {
224                 for (index, entry) in self.table_entries.clone().iter().enumerate() {
225                     if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
226                         self.inject_msix_and_clear_pba(index);
227                     }
228                 }
229                 return MsixStatus::Changed;
230             } else if !old_masked && self.masked {
231                 return MsixStatus::Changed;
232             }
233         } else {
234             error!(
235                 "invalid write to MSI-X Capability Structure offset {:x}",
236                 offset
237             );
238         }
239         MsixStatus::NothingToDo
240     }
241 
242     /// Create a snapshot of the current MsixConfig struct for use in
243     /// snapshotting.
snapshot(&mut self) -> anyhow::Result<AnySnapshot>244     pub fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
245         AnySnapshot::to_any(MsixConfigSnapshot {
246             table_entries: self.table_entries.clone(),
247             pba_entries: self.pba_entries.clone(),
248             masked: self.masked,
249             enabled: self.enabled,
250             msix_num: self.msix_num,
251             pci_id: self.pci_id,
252             device_name: self.device_name.clone(),
253             irq_gsi_vec: self
254                 .irq_vec
255                 .iter()
256                 .map(|irq_opt| irq_opt.as_ref().map(|irq| irq.gsi))
257                 .collect(),
258         })
259         .context("failed to serialize MsixConfigSnapshot")
260     }
261 
262     /// Restore a MsixConfig struct based on a snapshot. In short, this will
263     /// restore all data exposed via MMIO, and recreate all MSI-X vectors (they
264     /// will be re-wired to the irq chip).
restore(&mut self, snapshot: AnySnapshot) -> MsixResult<()>265     pub fn restore(&mut self, snapshot: AnySnapshot) -> MsixResult<()> {
266         let snapshot: MsixConfigSnapshot =
267             AnySnapshot::from_any(snapshot).map_err(MsixError::DeserializationFailed)?;
268 
269         self.table_entries = snapshot.table_entries;
270         self.pba_entries = snapshot.pba_entries;
271         self.masked = snapshot.masked;
272         self.enabled = snapshot.enabled;
273         self.msix_num = snapshot.msix_num;
274         self.pci_id = snapshot.pci_id;
275         self.device_name = snapshot.device_name;
276 
277         self.msix_release_all()?;
278         self.irq_vec
279             .resize_with(snapshot.irq_gsi_vec.len(), || None::<IrqfdGsi>);
280         for (vector, gsi) in snapshot.irq_gsi_vec.iter().enumerate() {
281             if let Some(gsi_num) = gsi {
282                 self.msix_restore_one(vector, *gsi_num)?;
283             } else {
284                 info!(
285                     "skipping restore of vector {} for device {}",
286                     vector, self.device_name
287                 );
288             }
289         }
290         Ok(())
291     }
292 
293     /// Restore the specified MSI-X vector.
294     ///
295     /// Note: we skip the checks from [MsixConfig::msix_enable_one] because for
296     /// an interrupt to be present in [MsixConfigSnapshot::irq_gsi_vec], it must
297     /// have passed those checks.
msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()>298     fn msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()> {
299         let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
300         let request = VmIrqRequest::AllocateOneMsiAtGsi {
301             irqfd,
302             gsi,
303             device_id: self.pci_id,
304             queue_id: index,
305             device_name: self.device_name.clone(),
306         };
307         self.msi_device_socket
308             .send(&request)
309             .map_err(MsixError::AllocateOneMsiSend)?;
310         if let VmIrqResponse::Err(e) = self
311             .msi_device_socket
312             .recv()
313             .map_err(MsixError::AllocateOneMsiRecv)?
314         {
315             return Err(MsixError::AllocateOneMsi(e));
316         };
317 
318         self.irq_vec[index] = Some(IrqfdGsi {
319             irqfd: match request {
320                 VmIrqRequest::AllocateOneMsiAtGsi { irqfd, .. } => irqfd,
321                 _ => unreachable!(),
322             },
323             gsi,
324         });
325         self.add_msi_route(index as u16, gsi)?;
326         Ok(())
327     }
328 
329     /// On warm restore, there could already be MSIs registered. We need to
330     /// release them in case the routing has changed (e.g. different
331     /// data <-> GSI).
msix_release_all(&mut self) -> MsixResult<()>332     fn msix_release_all(&mut self) -> MsixResult<()> {
333         for irqfd_gsi in self.irq_vec.drain(..).flatten() {
334             let request = VmIrqRequest::ReleaseOneIrq {
335                 gsi: irqfd_gsi.gsi,
336                 irqfd: irqfd_gsi.irqfd,
337             };
338 
339             self.msi_device_socket
340                 .send(&request)
341                 .map_err(MsixError::ReleaseOneIrqSend)?;
342             if let VmIrqResponse::Err(e) = self
343                 .msi_device_socket
344                 .recv()
345                 .map_err(MsixError::ReleaseOneIrqRecv)?
346             {
347                 return Err(MsixError::ReleaseOneIrq(e));
348             }
349         }
350         Ok(())
351     }
352 
add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()>353     fn add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()> {
354         let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
355         self.read_msix_table((index * 16).into(), data.as_mut());
356         let msi_address: u64 = u64::from_le_bytes(data);
357         let mut data: [u8; 4] = [0, 0, 0, 0];
358         self.read_msix_table((index * 16 + 8).into(), data.as_mut());
359         let msi_data: u32 = u32::from_le_bytes(data);
360 
361         if msi_address == 0 {
362             return Ok(());
363         }
364 
365         self.msi_device_socket
366             .send(&VmIrqRequest::AddMsiRoute {
367                 gsi,
368                 msi_address,
369                 msi_data,
370             })
371             .map_err(MsixError::AddMsiRouteSend)?;
372         if let VmIrqResponse::Err(e) = self
373             .msi_device_socket
374             .recv()
375             .map_err(MsixError::AddMsiRouteRecv)?
376         {
377             return Err(MsixError::AddMsiRoute(e));
378         }
379         Ok(())
380     }
381 
382     // Enable MSI-X
msix_enable_all(&mut self) -> MsixResult<()>383     fn msix_enable_all(&mut self) -> MsixResult<()> {
384         for index in 0..self.irq_vec.len() {
385             self.msix_enable_one(index)?;
386         }
387         Ok(())
388     }
389 
390     // Use a new MSI-X vector
391     // Create a new eventfd and bind them to a new msi
msix_enable_one(&mut self, index: usize) -> MsixResult<()>392     fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> {
393         if self.irq_vec[index].is_some()
394             || !self.enabled()
395             || self.masked()
396             || self.table_masked(index)
397         {
398             return Ok(());
399         }
400         let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
401         let request = VmIrqRequest::AllocateOneMsi {
402             irqfd,
403             device_id: self.pci_id,
404             queue_id: index,
405             device_name: self.device_name.clone(),
406         };
407         self.msi_device_socket
408             .send(&request)
409             .map_err(MsixError::AllocateOneMsiSend)?;
410         let irq_num: u32 = match self
411             .msi_device_socket
412             .recv()
413             .map_err(MsixError::AllocateOneMsiRecv)?
414         {
415             VmIrqResponse::AllocateOneMsi { gsi } => gsi,
416             VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)),
417             _ => unreachable!(),
418         };
419         self.irq_vec[index] = Some(IrqfdGsi {
420             irqfd: match request {
421                 VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd,
422                 _ => unreachable!(),
423             },
424             gsi: irq_num,
425         });
426 
427         self.add_msi_route(index as u16, irq_num)?;
428         Ok(())
429     }
430 
431     /// Read MSI-X table
432     ///  # Arguments
433     ///  * 'offset' - the offset within the MSI-X Table
434     ///  * 'data' - used to store the read results
435     ///
436     /// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full
437     /// DWORD or aligned full QWORD transactions; otherwise, the result is undefined.
438     ///
439     ///   location: DWORD3            DWORD2      DWORD1            DWORD0
440     ///   entry 0:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
441     ///   entry 1:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
442     ///   entry 2:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
443     ///   ...
read_msix_table(&self, offset: u64, data: &mut [u8])444     pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) {
445         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
446         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
447 
448         if index >= self.table_entries.len() {
449             error!("invalid MSI-X table index {}", index);
450             return;
451         }
452 
453         match data.len() {
454             4 => {
455                 let value = match modulo_offset {
456                     0x0 => self.table_entries[index].msg_addr_lo,
457                     0x4 => self.table_entries[index].msg_addr_hi,
458                     0x8 => self.table_entries[index].msg_data,
459                     0xc => self.table_entries[index].vector_ctl,
460                     _ => {
461                         error!("invalid offset");
462                         0
463                     }
464                 };
465 
466                 data.copy_from_slice(&value.to_le_bytes());
467             }
468             8 => {
469                 let value = match modulo_offset {
470                     0x0 => {
471                         (u64::from(self.table_entries[index].msg_addr_hi) << 32)
472                             | u64::from(self.table_entries[index].msg_addr_lo)
473                     }
474                     0x8 => {
475                         (u64::from(self.table_entries[index].vector_ctl) << 32)
476                             | u64::from(self.table_entries[index].msg_data)
477                     }
478                     _ => {
479                         error!("invalid offset");
480                         0
481                     }
482                 };
483 
484                 data.copy_from_slice(&value.to_le_bytes());
485             }
486             _ => error!("invalid data length"),
487         };
488     }
489 
490     /// Write to MSI-X table
491     ///
492     /// Message Address: the contents of this field specifies the address
493     ///     for the memory write transaction; different MSI-X vectors have
494     ///     different Message Address values
495     /// Message Data: the contents of this field specifies the data driven
496     ///     on AD\[31::00\] during the memory write transaction's data phase.
497     /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit
498     ///     is set, the function is prohibited from sending a message using
499     ///     this MSI-X Table entry.
write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus500     pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
501         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
502         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
503 
504         if index >= self.table_entries.len() {
505             error!("invalid MSI-X table index {}", index);
506             return MsixStatus::NothingToDo;
507         }
508 
509         // Store the value of the entry before modification
510         let old_entry = self.table_entries[index].clone();
511 
512         match data.len() {
513             4 => {
514                 let value = u32::from_le_bytes(data.try_into().unwrap());
515                 match modulo_offset {
516                     0x0 => self.table_entries[index].msg_addr_lo = value,
517                     0x4 => self.table_entries[index].msg_addr_hi = value,
518                     0x8 => self.table_entries[index].msg_data = value,
519                     0xc => self.table_entries[index].vector_ctl = value,
520                     _ => error!("invalid offset"),
521                 };
522             }
523             8 => {
524                 let value = u64::from_le_bytes(data.try_into().unwrap());
525                 match modulo_offset {
526                     0x0 => {
527                         self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
528                         self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
529                     }
530                     0x8 => {
531                         self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
532                         self.table_entries[index].vector_ctl = (value >> 32) as u32;
533                     }
534                     _ => error!("invalid offset"),
535                 };
536             }
537             _ => error!("invalid data length"),
538         };
539 
540         let new_entry = self.table_entries[index].clone();
541 
542         // This MSI-X vector is enabled for the first time.
543         if self.enabled()
544             && !self.masked()
545             && self.irq_vec[index].is_none()
546             && old_entry.masked()
547             && !new_entry.masked()
548         {
549             if let Err(e) = self.msix_enable_one(index) {
550                 error!("failed to enable MSI-X vector {}: {}", index, e);
551                 self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT;
552             }
553             return MsixStatus::EntryChanged(index);
554         }
555 
556         if self.enabled()
557             && (old_entry.msg_addr_lo != new_entry.msg_addr_lo
558                 || old_entry.msg_addr_hi != new_entry.msg_addr_hi
559                 || old_entry.msg_data != new_entry.msg_data)
560         {
561             if let Some(irqfd_gsi) = &self.irq_vec[index] {
562                 let irq_num = irqfd_gsi.gsi;
563                 if let Err(e) = self.add_msi_route(index as u16, irq_num) {
564                     error!("add_msi_route failed: {}", e);
565                 }
566             }
567         }
568 
569         // After the MSI-X table entry has been updated, it is necessary to
570         // check if the vector control masking bit has changed. In case the
571         // bit has been flipped from 1 to 0, we need to inject a MSI message
572         // if the corresponding pending bit from the PBA is set. Once the MSI
573         // has been injected, the pending bit in the PBA needs to be cleared.
574         // All of this is valid only if MSI-X has not been masked for the whole
575         // device.
576 
577         // Check if bit has been flipped
578         if !self.masked() {
579             if old_entry.masked() && !self.table_entries[index].masked() {
580                 if self.get_pba_bit(index as u16) == 1 {
581                     self.inject_msix_and_clear_pba(index);
582                 }
583                 return MsixStatus::EntryChanged(index);
584             } else if !old_entry.masked() && self.table_entries[index].masked() {
585                 return MsixStatus::EntryChanged(index);
586             }
587         }
588         MsixStatus::NothingToDo
589     }
590 
591     /// Read PBA Entries
592     ///  # Arguments
593     ///  * 'offset' - the offset within the PBA entries
594     ///  * 'data' - used to store the read results
595     ///
596     /// Pending Bits\[63::00\]: For each Pending Bit that is set, the function
597     /// has a pending message for the associated MSI-X Table entry.
read_pba_entries(&self, offset: u64, data: &mut [u8])598     pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) {
599         let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
600         let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
601 
602         if index >= self.pba_entries.len() {
603             error!("invalid PBA index {}", index);
604             return;
605         }
606 
607         match data.len() {
608             4 => {
609                 let value: u32 = match modulo_offset {
610                     0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
611                     0x4 => (self.pba_entries[index] >> 32) as u32,
612                     _ => {
613                         error!("invalid offset");
614                         0
615                     }
616                 };
617 
618                 data.copy_from_slice(&value.to_le_bytes());
619             }
620             8 => {
621                 let value: u64 = match modulo_offset {
622                     0x0 => self.pba_entries[index],
623                     _ => {
624                         error!("invalid offset");
625                         0
626                     }
627                 };
628 
629                 data.copy_from_slice(&value.to_le_bytes());
630             }
631             _ => error!("invalid data length"),
632         }
633     }
634 
635     /// Write to PBA Entries
636     ///
637     /// Software should never write, and should only read Pending Bits.
638     /// If software writes to Pending Bits, the result is undefined.
write_pba_entries(&mut self, _offset: u64, _data: &[u8])639     pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) {
640         error!("Pending Bit Array is read only");
641     }
642 
set_pba_bit(&mut self, vector: u16, set: bool)643     fn set_pba_bit(&mut self, vector: u16, set: bool) {
644         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
645 
646         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
647         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
648         let mut mask: u64 = (1 << shift) as u64;
649 
650         if set {
651             self.pba_entries[index] |= mask;
652         } else {
653             mask = !mask;
654             self.pba_entries[index] &= mask;
655         }
656     }
657 
get_pba_bit(&self, vector: u16) -> u8658     fn get_pba_bit(&self, vector: u16) -> u8 {
659         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
660 
661         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
662         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
663 
664         ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
665     }
666 
inject_msix_and_clear_pba(&mut self, vector: usize)667     fn inject_msix_and_clear_pba(&mut self, vector: usize) {
668         if let Some(irq) = &self.irq_vec[vector] {
669             irq.irqfd.signal().unwrap();
670         }
671 
672         // Clear the bit from PBA
673         self.set_pba_bit(vector as u16, false);
674     }
675 
676     /// Inject virtual interrupt to the guest
677     ///
678     ///  # Arguments
679     ///  * 'vector' - the index to the MSI-X Table entry
680     ///
681     /// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is
682     /// prohibited from sending the associated message, and the function
683     /// must set the associated Pending bit whenever the function would
684     /// otherwise send the message. When software unmasks a vector whose
685     /// associated Pending bit is set, the function must schedule sending
686     /// the associated message, and clear the Pending bit as soon as the
687     /// message has been sent.
688     ///
689     /// If the vector is unmasked, writing to irqfd which wakes up KVM to
690     /// inject virtual interrupt to the guest.
trigger(&mut self, vector: u16)691     pub fn trigger(&mut self, vector: u16) {
692         if self.table_entries[vector as usize].masked() || self.masked() {
693             self.set_pba_bit(vector, true);
694         } else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) {
695             irq.irqfd.signal().unwrap();
696         }
697     }
698 
699     /// Return the raw descriptor of the MSI device socket
get_msi_socket(&self) -> RawDescriptor700     pub fn get_msi_socket(&self) -> RawDescriptor {
701         self.msi_device_socket.as_raw_descriptor()
702     }
703 
704     /// Return irqfd of MSI-X Table entry
705     ///
706     ///  # Arguments
707     ///  * 'vector' - the index to the MSI-X table entry
get_irqfd(&self, vector: usize) -> Option<&Event>708     pub fn get_irqfd(&self, vector: usize) -> Option<&Event> {
709         match self.irq_vec.get(vector).unwrap_or(&None) {
710             Some(irq) => Some(&irq.irqfd),
711             None => None,
712         }
713     }
714 
destroy(&mut self)715     pub fn destroy(&mut self) {
716         while let Some(irq) = self.irq_vec.pop() {
717             if let Some(irq) = irq {
718                 let request = VmIrqRequest::ReleaseOneIrq {
719                     gsi: irq.gsi,
720                     irqfd: irq.irqfd,
721                 };
722                 if self.msi_device_socket.send(&request).is_err() {
723                     continue;
724                 }
725                 let _ = self.msi_device_socket.recv::<VmIrqResponse>();
726             }
727         }
728     }
729 }
730 
731 const MSIX_CONFIG_READ_MASK: [u32; 3] = [0xc000_0000, 0, 0];
732 
733 impl PciCapConfig for MsixConfig {
read_mask(&self) -> &'static [u32]734     fn read_mask(&self) -> &'static [u32] {
735         &MSIX_CONFIG_READ_MASK
736     }
737 
read_reg(&self, reg_idx: usize) -> u32738     fn read_reg(&self, reg_idx: usize) -> u32 {
739         if reg_idx == 0 {
740             self.read_msix_capability(0)
741         } else {
742             0
743         }
744     }
745 
write_reg( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> Option<Box<dyn PciCapConfigWriteResult>>746     fn write_reg(
747         &mut self,
748         reg_idx: usize,
749         offset: u64,
750         data: &[u8],
751     ) -> Option<Box<dyn PciCapConfigWriteResult>> {
752         let status = if reg_idx == 0 {
753             self.write_msix_capability(offset, data)
754         } else {
755             MsixStatus::NothingToDo
756         };
757         Some(Box::new(status))
758     }
759 }
760 
761 impl AsRawDescriptor for MsixConfig {
as_raw_descriptor(&self) -> RawDescriptor762     fn as_raw_descriptor(&self) -> RawDescriptor {
763         self.msi_device_socket.as_raw_descriptor()
764     }
765 }
766 
767 /// Message Control Register
768 //   10-0:  MSI-X Table size
769 //   13-11: Reserved
770 //   14:    Mask. Mask all MSI-X when set.
771 //   15:    Enable. Enable all MSI-X when set.
772 // See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details.
773 #[bitfield]
774 #[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
775 pub struct MsixCtrl {
776     table_size: B10,
777     reserved: B4,
778     mask: B1,
779     enable: B1,
780 }
781 
782 #[allow(dead_code)]
783 #[repr(C)]
784 #[derive(Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
785 /// MSI-X Capability Structure
786 pub struct MsixCap {
787     // To make add_capability() happy
788     _cap_vndr: u8,
789     _cap_next: u8,
790     // Message Control Register
791     msg_ctl: MsixCtrl,
792     // Table. Contains the offset and the BAR indicator (BIR)
793     //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
794     //   31-3: Table offset in the BAR pointed by the BIR.
795     table: u32,
796     // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
797     //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
798     //   31-3: PBA offset in the BAR pointed by the BIR.
799     pba: u32,
800 }
801 
802 impl PciCapability for MsixCap {
bytes(&self) -> &[u8]803     fn bytes(&self) -> &[u8] {
804         self.as_bytes()
805     }
806 
id(&self) -> PciCapabilityID807     fn id(&self) -> PciCapabilityID {
808         PciCapabilityID::Msix
809     }
810 
writable_bits(&self) -> Vec<u32>811     fn writable_bits(&self) -> Vec<u32> {
812         // Only msg_ctl[15:14] is writable
813         vec![0x3000_0000, 0, 0]
814     }
815 }
816 
817 impl MsixCap {
new( table_pci_bar: u8, table_size: u16, table_off: u32, pba_pci_bar: u8, pba_off: u32, ) -> Self818     pub fn new(
819         table_pci_bar: u8,
820         table_size: u16,
821         table_off: u32,
822         pba_pci_bar: u8,
823         pba_off: u32,
824     ) -> Self {
825         assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
826 
827         // Set the table size and enable MSI-X.
828         let mut msg_ctl = MsixCtrl::new();
829         msg_ctl.set_enable(1);
830         // Table Size is N - 1 encoded.
831         msg_ctl.set_table_size(table_size - 1);
832 
833         MsixCap {
834             _cap_vndr: 0,
835             _cap_next: 0,
836             msg_ctl,
837             table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
838             pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
839         }
840     }
841 }
842 
843 #[cfg(test)]
844 mod tests {
845 
846     use std::thread;
847 
848     use super::*;
849 
850     #[track_caller]
recv_allocate_msi(t: &Tube) -> u32851     fn recv_allocate_msi(t: &Tube) -> u32 {
852         match t.recv::<VmIrqRequest>().unwrap() {
853             VmIrqRequest::AllocateOneMsiAtGsi { gsi, .. } => gsi,
854             msg => panic!("unexpected irqchip message: {:?}", msg),
855         }
856     }
857 
858     struct MsiRouteDetails {
859         gsi: u32,
860         msi_address: u64,
861         msi_data: u32,
862     }
863 
864     #[track_caller]
recv_add_msi_route(t: &Tube) -> MsiRouteDetails865     fn recv_add_msi_route(t: &Tube) -> MsiRouteDetails {
866         match t.recv::<VmIrqRequest>().unwrap() {
867             VmIrqRequest::AddMsiRoute {
868                 gsi,
869                 msi_address,
870                 msi_data,
871             } => MsiRouteDetails {
872                 gsi,
873                 msi_address,
874                 msi_data,
875             },
876             msg => panic!("unexpected irqchip message: {:?}", msg),
877         }
878     }
879 
880     #[track_caller]
recv_release_one_irq(t: &Tube) -> u32881     fn recv_release_one_irq(t: &Tube) -> u32 {
882         match t.recv::<VmIrqRequest>().unwrap() {
883             VmIrqRequest::ReleaseOneIrq { gsi, irqfd: _ } => gsi,
884             msg => panic!("unexpected irqchip message: {:?}", msg),
885         }
886     }
887 
888     #[track_caller]
send_ok(t: &Tube)889     fn send_ok(t: &Tube) {
890         t.send(&VmIrqResponse::Ok).unwrap();
891     }
892 
893     /// Tests a cold restore where there are no existing vectors at the time
894     /// restore is called.
895     #[test]
verify_msix_restore_cold_smoke()896     fn verify_msix_restore_cold_smoke() {
897         let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
898         let (_unused, unused_config_tube) = Tube::pair().unwrap();
899 
900         let mut cfg = MsixConfig::new(2, unused_config_tube, 0, "test_device".to_owned());
901 
902         // Set up two MSI-X vectors (0 and 1).
903         // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
904         cfg.table_entries[0].msg_data = 0xd0;
905         cfg.table_entries[0].msg_addr_lo = 0xa0;
906         cfg.table_entries[0].msg_addr_hi = 0;
907         cfg.table_entries[1].msg_data = 0xd1;
908         cfg.table_entries[1].msg_addr_lo = 0xa1;
909         cfg.table_entries[1].msg_addr_hi = 0;
910 
911         // Pretend that these vectors were hooked up to GSIs 10 & 20,
912         // respectively.
913         cfg.irq_vec = vec![
914             Some(IrqfdGsi {
915                 gsi: 10,
916                 irqfd: Event::new().unwrap(),
917             }),
918             Some(IrqfdGsi {
919                 gsi: 20,
920                 irqfd: Event::new().unwrap(),
921             }),
922         ];
923 
924         // Take a snapshot of MsixConfig.
925         let snapshot = cfg.snapshot().unwrap();
926 
927         // Create a fake irqchip to respond to our requests
928         let irqchip_fake = thread::spawn(move || {
929             assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
930             send_ok(&irqchip_tube);
931             let route_one = recv_add_msi_route(&irqchip_tube);
932             assert_eq!(route_one.gsi, 10);
933             assert_eq!(route_one.msi_address, 0xa0);
934             assert_eq!(route_one.msi_data, 0xd0);
935             send_ok(&irqchip_tube);
936 
937             assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
938             send_ok(&irqchip_tube);
939             let route_two = recv_add_msi_route(&irqchip_tube);
940             assert_eq!(route_two.gsi, 20);
941             assert_eq!(route_two.msi_address, 0xa1);
942             assert_eq!(route_two.msi_data, 0xd1);
943             send_ok(&irqchip_tube);
944             irqchip_tube
945         });
946 
947         let mut restored_cfg = MsixConfig::new(10, msix_config_tube, 10, "some_device".to_owned());
948         restored_cfg.restore(snapshot).unwrap();
949         irqchip_fake.join().unwrap();
950 
951         assert_eq!(restored_cfg.pci_id, 0);
952         assert_eq!(restored_cfg.device_name, "test_device");
953     }
954 
955     /// Tests a warm restore where there are existing vectors at the time
956     /// restore is called. These vectors need to be released first.
957     #[test]
verify_msix_restore_warm_smoke()958     fn verify_msix_restore_warm_smoke() {
959         let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
960 
961         let mut cfg = MsixConfig::new(2, msix_config_tube, 0, "test_device".to_owned());
962 
963         // Set up two MSI-X vectors (0 and 1).
964         // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
965         cfg.table_entries[0].msg_data = 0xd0;
966         cfg.table_entries[0].msg_addr_lo = 0xa0;
967         cfg.table_entries[0].msg_addr_hi = 0;
968         cfg.table_entries[1].msg_data = 0xd1;
969         cfg.table_entries[1].msg_addr_lo = 0xa1;
970         cfg.table_entries[1].msg_addr_hi = 0;
971 
972         // Pretend that these vectors were hooked up to GSIs 10 & 20,
973         // respectively.
974         cfg.irq_vec = vec![
975             Some(IrqfdGsi {
976                 gsi: 10,
977                 irqfd: Event::new().unwrap(),
978             }),
979             Some(IrqfdGsi {
980                 gsi: 20,
981                 irqfd: Event::new().unwrap(),
982             }),
983         ];
984 
985         // Take a snapshot of MsixConfig.
986         let snapshot = cfg.snapshot().unwrap();
987 
988         // Create a fake irqchip to respond to our requests
989         let irqchip_fake = thread::spawn(move || {
990             // First, we free the existing vectors / GSIs.
991             assert_eq!(recv_release_one_irq(&irqchip_tube), 10);
992             send_ok(&irqchip_tube);
993             assert_eq!(recv_release_one_irq(&irqchip_tube), 20);
994             send_ok(&irqchip_tube);
995 
996             // Now we re-allocate them.
997             assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
998             send_ok(&irqchip_tube);
999             let route_one = recv_add_msi_route(&irqchip_tube);
1000             assert_eq!(route_one.gsi, 10);
1001             assert_eq!(route_one.msi_address, 0xa0);
1002             assert_eq!(route_one.msi_data, 0xd0);
1003             send_ok(&irqchip_tube);
1004 
1005             assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
1006             send_ok(&irqchip_tube);
1007             let route_two = recv_add_msi_route(&irqchip_tube);
1008             assert_eq!(route_two.gsi, 20);
1009             assert_eq!(route_two.msi_address, 0xa1);
1010             assert_eq!(route_two.msi_data, 0xd1);
1011             send_ok(&irqchip_tube);
1012             irqchip_tube
1013         });
1014 
1015         cfg.restore(snapshot).unwrap();
1016         irqchip_fake.join().unwrap();
1017 
1018         assert_eq!(cfg.pci_id, 0);
1019         assert_eq!(cfg.device_name, "test_device");
1020     }
1021 }
1022