• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::convert::TryInto;
6 
7 use anyhow::Context;
8 use base::error;
9 use base::info;
10 use base::AsRawDescriptor;
11 use base::Error as SysError;
12 use base::Event;
13 use base::RawDescriptor;
14 use base::Tube;
15 use base::TubeError;
16 use bit_field::*;
17 use remain::sorted;
18 use serde::Deserialize;
19 use serde::Serialize;
20 use thiserror::Error;
21 use vm_control::VmIrqRequest;
22 use vm_control::VmIrqResponse;
23 use zerocopy::AsBytes;
24 use zerocopy::FromBytes;
25 
26 use crate::pci::PciCapability;
27 use crate::pci::PciCapabilityID;
28 
29 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
30 pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
31 pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
32 pub const BITS_PER_PBA_ENTRY: usize = 64;
33 const FUNCTION_MASK_BIT: u16 = 0x4000;
34 const MSIX_ENABLE_BIT: u16 = 0x8000;
35 const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1;
36 
37 #[derive(Serialize, Deserialize, Clone, Default)]
38 struct MsixTableEntry {
39     msg_addr_lo: u32,
40     msg_addr_hi: u32,
41     msg_data: u32,
42     vector_ctl: u32,
43 }
44 
45 impl MsixTableEntry {
masked(&self) -> bool46     fn masked(&self) -> bool {
47         self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT
48     }
49 }
50 
51 struct IrqfdGsi {
52     irqfd: Event,
53     gsi: u32,
54 }
55 
56 /// Wrapper over MSI-X Capability Structure and MSI-X Tables
57 pub struct MsixConfig {
58     table_entries: Vec<MsixTableEntry>,
59     pba_entries: Vec<u64>,
60     irq_vec: Vec<Option<IrqfdGsi>>,
61     masked: bool,
62     enabled: bool,
63     msi_device_socket: Tube,
64     msix_num: u16,
65     pci_id: u32,
66     device_name: String,
67 }
68 
69 #[derive(Serialize, Deserialize)]
70 struct MsixConfigSnapshot {
71     table_entries: Vec<MsixTableEntry>,
72     pba_entries: Vec<u64>,
73     /// Just like MsixConfig::irq_vec, but only the GSI.
74     irq_gsi_vec: Vec<Option<u32>>,
75     masked: bool,
76     enabled: bool,
77     msix_num: u16,
78     pci_id: u32,
79     device_name: String,
80 }
81 
82 #[sorted]
83 #[derive(Error, Debug)]
84 pub enum MsixError {
85     #[error("AddMsiRoute failed: {0}")]
86     AddMsiRoute(SysError),
87     #[error("failed to receive AddMsiRoute response: {0}")]
88     AddMsiRouteRecv(TubeError),
89     #[error("failed to send AddMsiRoute request: {0}")]
90     AddMsiRouteSend(TubeError),
91     #[error("AllocateOneMsi failed: {0}")]
92     AllocateOneMsi(SysError),
93     #[error("failed to receive AllocateOneMsi response: {0}")]
94     AllocateOneMsiRecv(TubeError),
95     #[error("failed to send AllocateOneMsi request: {0}")]
96     AllocateOneMsiSend(TubeError),
97     #[error("failed to deserialize snapshot: {0}")]
98     DeserializationFailed(serde_json::Error),
99     #[error("invalid vector length in snapshot: {0}")]
100     InvalidVectorLength(std::num::TryFromIntError),
101     #[error("ReleaseOneIrq failed: {0}")]
102     ReleaseOneIrq(base::Error),
103     #[error("failed to receive ReleaseOneIrq response: {0}")]
104     ReleaseOneIrqRecv(TubeError),
105     #[error("failed to send ReleaseOneIrq request: {0}")]
106     ReleaseOneIrqSend(TubeError),
107 }
108 
109 type MsixResult<T> = std::result::Result<T, MsixError>;
110 
111 pub enum MsixStatus {
112     Changed,
113     EntryChanged(usize),
114     NothingToDo,
115 }
116 
117 impl MsixConfig {
new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self118     pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self {
119         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
120 
121         let mut table_entries: Vec<MsixTableEntry> = Vec::new();
122         table_entries.resize_with(msix_vectors as usize, Default::default);
123         table_entries
124             .iter_mut()
125             .for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT);
126         let mut pba_entries: Vec<u64> = Vec::new();
127         let num_pba_entries: usize =
128             ((msix_vectors as usize) + BITS_PER_PBA_ENTRY - 1) / BITS_PER_PBA_ENTRY;
129         pba_entries.resize_with(num_pba_entries, Default::default);
130 
131         let mut irq_vec = Vec::new();
132         irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>);
133 
134         MsixConfig {
135             table_entries,
136             pba_entries,
137             irq_vec,
138             masked: false,
139             enabled: false,
140             msi_device_socket: vm_socket,
141             msix_num: msix_vectors,
142             pci_id,
143             device_name,
144         }
145     }
146 
147     /// Get the number of MSI-X vectors in this configuration.
num_vectors(&self) -> u16148     pub fn num_vectors(&self) -> u16 {
149         self.msix_num
150     }
151 
152     /// Check whether the Function Mask bit in Message Control word in set or not.
153     /// if 1, all of the vectors associated with the function are masked,
154     /// regardless of their per-vector Mask bit states.
155     /// If 0, each vector's Mask bit determines whether the vector is masked or not.
masked(&self) -> bool156     pub fn masked(&self) -> bool {
157         self.masked
158     }
159 
160     /// Check whether the Function Mask bit in MSIX table Message Control
161     /// word in set or not.
162     /// If true, the vector is masked.
163     /// If false, the vector is unmasked.
table_masked(&self, index: usize) -> bool164     pub fn table_masked(&self, index: usize) -> bool {
165         if index >= self.table_entries.len() {
166             true
167         } else {
168             self.table_entries[index].masked()
169         }
170     }
171 
172     /// Check whether the MSI-X Enable bit in Message Control word in set or not.
173     /// if 1, the function is permitted to use MSI-X to request service.
enabled(&self) -> bool174     pub fn enabled(&self) -> bool {
175         self.enabled
176     }
177 
178     /// Read the MSI-X Capability Structure.
179     /// The top 2 bits in Message Control word are emulated and all other
180     /// bits are read only.
read_msix_capability(&self, data: u32) -> u32181     pub fn read_msix_capability(&self, data: u32) -> u32 {
182         let mut msg_ctl = (data >> 16) as u16;
183         msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT);
184 
185         if self.enabled {
186             msg_ctl |= MSIX_ENABLE_BIT;
187         }
188         if self.masked {
189             msg_ctl |= FUNCTION_MASK_BIT;
190         }
191         (msg_ctl as u32) << 16 | (data & u16::max_value() as u32)
192     }
193 
194     /// Write to the MSI-X Capability Structure.
195     /// Only the top 2 bits in Message Control Word are writable.
write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus196     pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
197         if offset == 2 && data.len() == 2 {
198             let reg = u16::from_le_bytes([data[0], data[1]]);
199             let old_masked = self.masked;
200             let old_enabled = self.enabled;
201 
202             self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT;
203             self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT;
204 
205             if !old_enabled && self.enabled {
206                 if let Err(e) = self.msix_enable_all() {
207                     error!("failed to enable MSI-X: {}", e);
208                     self.enabled = false;
209                 }
210             }
211 
212             // If the Function Mask bit was set, and has just been cleared, it's
213             // important to go through the entire PBA to check if there was any
214             // pending MSI-X message to inject, given that the vector is not
215             // masked.
216             if old_masked && !self.masked {
217                 for (index, entry) in self.table_entries.clone().iter().enumerate() {
218                     if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
219                         self.inject_msix_and_clear_pba(index);
220                     }
221                 }
222                 return MsixStatus::Changed;
223             } else if !old_masked && self.masked {
224                 return MsixStatus::Changed;
225             }
226         } else {
227             error!(
228                 "invalid write to MSI-X Capability Structure offset {:x}",
229                 offset
230             );
231         }
232         MsixStatus::NothingToDo
233     }
234 
235     /// Create a snapshot of the current MsixConfig struct for use in
236     /// snapshotting.
snapshot(&mut self) -> anyhow::Result<serde_json::Value>237     pub fn snapshot(&mut self) -> anyhow::Result<serde_json::Value> {
238         serde_json::to_value(MsixConfigSnapshot {
239             table_entries: self.table_entries.clone(),
240             pba_entries: self.pba_entries.clone(),
241             masked: self.masked,
242             enabled: self.enabled,
243             msix_num: self.msix_num,
244             pci_id: self.pci_id,
245             device_name: self.device_name.clone(),
246             irq_gsi_vec: self
247                 .irq_vec
248                 .iter()
249                 .map(|irq_opt| irq_opt.as_ref().map(|irq| irq.gsi))
250                 .collect(),
251         })
252         .context("failed to serialize MsixConfigSnapshot")
253     }
254 
255     /// Restore a MsixConfig struct based on a snapshot. In short, this will
256     /// restore all data exposed via MMIO, and recreate all MSI-X vectors (they
257     /// will be re-wired to the irq chip).
restore(&mut self, snapshot: serde_json::Value) -> MsixResult<()>258     pub fn restore(&mut self, snapshot: serde_json::Value) -> MsixResult<()> {
259         let snapshot: MsixConfigSnapshot =
260             serde_json::from_value(snapshot).map_err(MsixError::DeserializationFailed)?;
261 
262         self.table_entries = snapshot.table_entries;
263         self.pba_entries = snapshot.pba_entries;
264         self.masked = snapshot.masked;
265         self.enabled = snapshot.enabled;
266         self.msix_num = snapshot.msix_num;
267         self.pci_id = snapshot.pci_id;
268         self.device_name = snapshot.device_name;
269 
270         self.msix_release_all()?;
271         self.irq_vec
272             .resize_with(snapshot.irq_gsi_vec.len(), || None::<IrqfdGsi>);
273         for (vector, gsi) in snapshot.irq_gsi_vec.iter().enumerate() {
274             if let Some(gsi_num) = gsi {
275                 self.msix_restore_one(vector, *gsi_num)?;
276             } else {
277                 info!(
278                     "skipping restore of vector {} for device {}",
279                     vector, self.device_name
280                 );
281             }
282         }
283         Ok(())
284     }
285 
286     /// Restore the specified MSI-X vector.
287     ///
288     /// Note: we skip the checks from [MsixConfig::msix_enable_one] because for
289     /// an interrupt to be present in [MsixConfigSnapshot::irq_gsi_vec], it must
290     /// have passed those checks.
msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()>291     fn msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()> {
292         let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
293         let request = VmIrqRequest::AllocateOneMsiAtGsi {
294             irqfd,
295             gsi,
296             device_id: self.pci_id,
297             queue_id: index as usize,
298             device_name: self.device_name.clone(),
299         };
300         self.msi_device_socket
301             .send(&request)
302             .map_err(MsixError::AllocateOneMsiSend)?;
303         if let VmIrqResponse::Err(e) = self
304             .msi_device_socket
305             .recv()
306             .map_err(MsixError::AllocateOneMsiRecv)?
307         {
308             return Err(MsixError::AllocateOneMsi(e));
309         };
310 
311         self.irq_vec[index] = Some(IrqfdGsi {
312             irqfd: match request {
313                 VmIrqRequest::AllocateOneMsiAtGsi { irqfd, .. } => irqfd,
314                 _ => unreachable!(),
315             },
316             gsi,
317         });
318         self.add_msi_route(index as u16, gsi)?;
319         Ok(())
320     }
321 
322     /// On warm restore, there could already be MSIs registered. We need to
323     /// release them in case the routing has changed (e.g. different
324     /// data <-> GSI).
msix_release_all(&mut self) -> MsixResult<()>325     fn msix_release_all(&mut self) -> MsixResult<()> {
326         for irqfd_gsi in self.irq_vec.drain(..).flatten() {
327             let request = VmIrqRequest::ReleaseOneIrq {
328                 gsi: irqfd_gsi.gsi,
329                 irqfd: irqfd_gsi.irqfd,
330             };
331 
332             self.msi_device_socket
333                 .send(&request)
334                 .map_err(MsixError::ReleaseOneIrqSend)?;
335             if let VmIrqResponse::Err(e) = self
336                 .msi_device_socket
337                 .recv()
338                 .map_err(MsixError::ReleaseOneIrqRecv)?
339             {
340                 return Err(MsixError::ReleaseOneIrq(e));
341             }
342         }
343         Ok(())
344     }
345 
add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()>346     fn add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()> {
347         let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
348         self.read_msix_table((index * 16).into(), data.as_mut());
349         let msi_address: u64 = u64::from_le_bytes(data);
350         let mut data: [u8; 4] = [0, 0, 0, 0];
351         self.read_msix_table((index * 16 + 8).into(), data.as_mut());
352         let msi_data: u32 = u32::from_le_bytes(data);
353 
354         if msi_address == 0 {
355             return Ok(());
356         }
357 
358         self.msi_device_socket
359             .send(&VmIrqRequest::AddMsiRoute {
360                 gsi,
361                 msi_address,
362                 msi_data,
363             })
364             .map_err(MsixError::AddMsiRouteSend)?;
365         if let VmIrqResponse::Err(e) = self
366             .msi_device_socket
367             .recv()
368             .map_err(MsixError::AddMsiRouteRecv)?
369         {
370             return Err(MsixError::AddMsiRoute(e));
371         }
372         Ok(())
373     }
374 
375     // Enable MSI-X
msix_enable_all(&mut self) -> MsixResult<()>376     fn msix_enable_all(&mut self) -> MsixResult<()> {
377         for index in 0..self.irq_vec.len() {
378             self.msix_enable_one(index)?;
379         }
380         Ok(())
381     }
382 
383     // Use a new MSI-X vector
384     // Create a new eventfd and bind them to a new msi
msix_enable_one(&mut self, index: usize) -> MsixResult<()>385     fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> {
386         if self.irq_vec[index].is_some()
387             || !self.enabled()
388             || self.masked()
389             || self.table_masked(index)
390         {
391             return Ok(());
392         }
393         let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
394         let request = VmIrqRequest::AllocateOneMsi {
395             irqfd,
396             device_id: self.pci_id,
397             queue_id: index as usize,
398             device_name: self.device_name.clone(),
399         };
400         self.msi_device_socket
401             .send(&request)
402             .map_err(MsixError::AllocateOneMsiSend)?;
403         let irq_num: u32 = match self
404             .msi_device_socket
405             .recv()
406             .map_err(MsixError::AllocateOneMsiRecv)?
407         {
408             VmIrqResponse::AllocateOneMsi { gsi } => gsi,
409             VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)),
410             _ => unreachable!(),
411         };
412         self.irq_vec[index] = Some(IrqfdGsi {
413             irqfd: match request {
414                 VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd,
415                 _ => unreachable!(),
416             },
417             gsi: irq_num,
418         });
419 
420         self.add_msi_route(index as u16, irq_num)?;
421         Ok(())
422     }
423 
424     /// Read MSI-X table
425     ///  # Arguments
426     ///  * 'offset' - the offset within the MSI-X Table
427     ///  * 'data' - used to store the read results
428     ///
429     /// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full
430     /// DWORD or aligned full QWORD transactions; otherwise, the result is undefined.
431     ///
432     ///   location: DWORD3            DWORD2      DWORD1            DWORD0
433     ///   entry 0:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
434     ///   entry 1:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
435     ///   entry 2:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
436     ///   ...
read_msix_table(&self, offset: u64, data: &mut [u8])437     pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) {
438         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
439         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
440 
441         match data.len() {
442             4 => {
443                 let value = match modulo_offset {
444                     0x0 => self.table_entries[index].msg_addr_lo,
445                     0x4 => self.table_entries[index].msg_addr_hi,
446                     0x8 => self.table_entries[index].msg_data,
447                     0xc => self.table_entries[index].vector_ctl,
448                     _ => {
449                         error!("invalid offset");
450                         0
451                     }
452                 };
453 
454                 data.copy_from_slice(&value.to_le_bytes());
455             }
456             8 => {
457                 let value = match modulo_offset {
458                     0x0 => {
459                         (u64::from(self.table_entries[index].msg_addr_hi) << 32)
460                             | u64::from(self.table_entries[index].msg_addr_lo)
461                     }
462                     0x8 => {
463                         (u64::from(self.table_entries[index].vector_ctl) << 32)
464                             | u64::from(self.table_entries[index].msg_data)
465                     }
466                     _ => {
467                         error!("invalid offset");
468                         0
469                     }
470                 };
471 
472                 data.copy_from_slice(&value.to_le_bytes());
473             }
474             _ => error!("invalid data length"),
475         };
476     }
477 
478     /// Write to MSI-X table
479     ///
480     /// Message Address: the contents of this field specifies the address
481     ///     for the memory write transaction; different MSI-X vectors have
482     ///     different Message Address values
483     /// Message Data: the contents of this field specifies the data driven
484     ///     on AD\[31::00\] during the memory write transaction's data phase.
485     /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit
486     ///     is set, the function is prohibited from sending a message using
487     ///     this MSI-X Table entry.
write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus488     pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
489         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
490         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
491 
492         // Store the value of the entry before modification
493         let old_entry = self.table_entries[index].clone();
494 
495         match data.len() {
496             4 => {
497                 let value = u32::from_le_bytes(data.try_into().unwrap());
498                 match modulo_offset {
499                     0x0 => self.table_entries[index].msg_addr_lo = value,
500                     0x4 => self.table_entries[index].msg_addr_hi = value,
501                     0x8 => self.table_entries[index].msg_data = value,
502                     0xc => self.table_entries[index].vector_ctl = value,
503                     _ => error!("invalid offset"),
504                 };
505             }
506             8 => {
507                 let value = u64::from_le_bytes(data.try_into().unwrap());
508                 match modulo_offset {
509                     0x0 => {
510                         self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
511                         self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
512                     }
513                     0x8 => {
514                         self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
515                         self.table_entries[index].vector_ctl = (value >> 32) as u32;
516                     }
517                     _ => error!("invalid offset"),
518                 };
519             }
520             _ => error!("invalid data length"),
521         };
522 
523         let new_entry = self.table_entries[index].clone();
524 
525         // This MSI-X vector is enabled for the first time.
526         if self.enabled()
527             && !self.masked()
528             && self.irq_vec[index].is_none()
529             && old_entry.masked()
530             && !new_entry.masked()
531         {
532             if let Err(e) = self.msix_enable_one(index) {
533                 error!("failed to enable MSI-X vector {}: {}", index, e);
534                 self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT;
535             }
536             return MsixStatus::EntryChanged(index);
537         }
538 
539         if self.enabled()
540             && (old_entry.msg_addr_lo != new_entry.msg_addr_lo
541                 || old_entry.msg_addr_hi != new_entry.msg_addr_hi
542                 || old_entry.msg_data != new_entry.msg_data)
543         {
544             if let Some(irqfd_gsi) = &self.irq_vec[index] {
545                 let irq_num = irqfd_gsi.gsi;
546                 if let Err(e) = self.add_msi_route(index as u16, irq_num) {
547                     error!("add_msi_route failed: {}", e);
548                 }
549             }
550         }
551 
552         // After the MSI-X table entry has been updated, it is necessary to
553         // check if the vector control masking bit has changed. In case the
554         // bit has been flipped from 1 to 0, we need to inject a MSI message
555         // if the corresponding pending bit from the PBA is set. Once the MSI
556         // has been injected, the pending bit in the PBA needs to be cleared.
557         // All of this is valid only if MSI-X has not been masked for the whole
558         // device.
559 
560         // Check if bit has been flipped
561         if !self.masked() {
562             if old_entry.masked() && !self.table_entries[index].masked() {
563                 if self.get_pba_bit(index as u16) == 1 {
564                     self.inject_msix_and_clear_pba(index);
565                 }
566                 return MsixStatus::EntryChanged(index);
567             } else if !old_entry.masked() && self.table_entries[index].masked() {
568                 return MsixStatus::EntryChanged(index);
569             }
570         }
571         MsixStatus::NothingToDo
572     }
573 
574     /// Read PBA Entries
575     ///  # Arguments
576     ///  * 'offset' - the offset within the PBA entries
577     ///  * 'data' - used to store the read results
578     ///
579     /// Pending Bits\[63::00\]: For each Pending Bit that is set, the function
580     /// has a pending message for the associated MSI-X Table entry.
read_pba_entries(&self, offset: u64, data: &mut [u8])581     pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) {
582         let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
583         let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
584 
585         match data.len() {
586             4 => {
587                 let value: u32 = match modulo_offset {
588                     0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
589                     0x4 => (self.pba_entries[index] >> 32) as u32,
590                     _ => {
591                         error!("invalid offset");
592                         0
593                     }
594                 };
595 
596                 data.copy_from_slice(&value.to_le_bytes());
597             }
598             8 => {
599                 let value: u64 = match modulo_offset {
600                     0x0 => self.pba_entries[index],
601                     _ => {
602                         error!("invalid offset");
603                         0
604                     }
605                 };
606 
607                 data.copy_from_slice(&value.to_le_bytes());
608             }
609             _ => error!("invalid data length"),
610         }
611     }
612 
613     /// Write to PBA Entries
614     ///
615     /// Software should never write, and should only read Pending Bits.
616     /// If software writes to Pending Bits, the result is undefined.
write_pba_entries(&mut self, _offset: u64, _data: &[u8])617     pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) {
618         error!("Pending Bit Array is read only");
619     }
620 
set_pba_bit(&mut self, vector: u16, set: bool)621     fn set_pba_bit(&mut self, vector: u16, set: bool) {
622         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
623 
624         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
625         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
626         let mut mask: u64 = (1 << shift) as u64;
627 
628         if set {
629             self.pba_entries[index] |= mask;
630         } else {
631             mask = !mask;
632             self.pba_entries[index] &= mask;
633         }
634     }
635 
get_pba_bit(&self, vector: u16) -> u8636     fn get_pba_bit(&self, vector: u16) -> u8 {
637         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
638 
639         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
640         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
641 
642         ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
643     }
644 
inject_msix_and_clear_pba(&mut self, vector: usize)645     fn inject_msix_and_clear_pba(&mut self, vector: usize) {
646         if let Some(irq) = &self.irq_vec[vector] {
647             irq.irqfd.signal().unwrap();
648         }
649 
650         // Clear the bit from PBA
651         self.set_pba_bit(vector as u16, false);
652     }
653 
654     /// Inject virtual interrupt to the guest
655     ///
656     ///  # Arguments
657     ///  * 'vector' - the index to the MSI-X Table entry
658     ///
659     /// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is
660     /// prohibited from sending the associated message, and the function
661     /// must set the associated Pending bit whenever the function would
662     /// otherwise send the message. When software unmasks a vector whose
663     /// associated Pending bit is set, the function must schedule sending
664     /// the associated message, and clear the Pending bit as soon as the
665     /// message has been sent.
666     ///
667     /// If the vector is unmasked, writing to irqfd which wakes up KVM to
668     /// inject virtual interrupt to the guest.
trigger(&mut self, vector: u16)669     pub fn trigger(&mut self, vector: u16) {
670         if self.table_entries[vector as usize].masked() || self.masked() {
671             self.set_pba_bit(vector, true);
672         } else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) {
673             irq.irqfd.signal().unwrap();
674         }
675     }
676 
677     /// Return the raw descriptor of the MSI device socket
get_msi_socket(&self) -> RawDescriptor678     pub fn get_msi_socket(&self) -> RawDescriptor {
679         self.msi_device_socket.as_raw_descriptor()
680     }
681 
682     /// Return irqfd of MSI-X Table entry
683     ///
684     ///  # Arguments
685     ///  * 'vector' - the index to the MSI-X table entry
get_irqfd(&self, vector: usize) -> Option<&Event>686     pub fn get_irqfd(&self, vector: usize) -> Option<&Event> {
687         match self.irq_vec.get(vector as usize).unwrap_or(&None) {
688             Some(irq) => Some(&irq.irqfd),
689             None => None,
690         }
691     }
692 
destroy(&mut self)693     pub fn destroy(&mut self) {
694         while let Some(irq) = self.irq_vec.pop() {
695             if let Some(irq) = irq {
696                 let request = VmIrqRequest::ReleaseOneIrq {
697                     gsi: irq.gsi,
698                     irqfd: irq.irqfd,
699                 };
700                 if self.msi_device_socket.send(&request).is_err() {
701                     continue;
702                 }
703                 let _ = self.msi_device_socket.recv::<VmIrqResponse>();
704             }
705         }
706     }
707 }
708 
709 impl AsRawDescriptor for MsixConfig {
as_raw_descriptor(&self) -> RawDescriptor710     fn as_raw_descriptor(&self) -> RawDescriptor {
711         self.msi_device_socket.as_raw_descriptor()
712     }
713 }
714 
715 /// Message Control Register
716 //   10-0:  MSI-X Table size
717 //   13-11: Reserved
718 //   14:    Mask. Mask all MSI-X when set.
719 //   15:    Enable. Enable all MSI-X when set.
720 // See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details.
721 #[bitfield]
722 #[derive(Copy, Clone, Default, AsBytes, FromBytes)]
723 pub struct MsixCtrl {
724     table_size: B10,
725     reserved: B4,
726     mask: B1,
727     enable: B1,
728 }
729 
730 #[allow(dead_code)]
731 #[repr(C)]
732 #[derive(Clone, Copy, Default, AsBytes, FromBytes)]
733 /// MSI-X Capability Structure
734 pub struct MsixCap {
735     // To make add_capability() happy
736     _cap_vndr: u8,
737     _cap_next: u8,
738     // Message Control Register
739     msg_ctl: MsixCtrl,
740     // Table. Contains the offset and the BAR indicator (BIR)
741     //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
742     //   31-3: Table offset in the BAR pointed by the BIR.
743     table: u32,
744     // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
745     //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
746     //   31-3: PBA offset in the BAR pointed by the BIR.
747     pba: u32,
748 }
749 
750 impl PciCapability for MsixCap {
bytes(&self) -> &[u8]751     fn bytes(&self) -> &[u8] {
752         self.as_bytes()
753     }
754 
id(&self) -> PciCapabilityID755     fn id(&self) -> PciCapabilityID {
756         PciCapabilityID::Msix
757     }
758 
writable_bits(&self) -> Vec<u32>759     fn writable_bits(&self) -> Vec<u32> {
760         // Only msg_ctl[15:14] is writable
761         vec![0x3000_0000, 0, 0]
762     }
763 }
764 
765 impl MsixCap {
new( table_pci_bar: u8, table_size: u16, table_off: u32, pba_pci_bar: u8, pba_off: u32, ) -> Self766     pub fn new(
767         table_pci_bar: u8,
768         table_size: u16,
769         table_off: u32,
770         pba_pci_bar: u8,
771         pba_off: u32,
772     ) -> Self {
773         assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
774 
775         // Set the table size and enable MSI-X.
776         let mut msg_ctl = MsixCtrl::new();
777         msg_ctl.set_enable(1);
778         // Table Size is N - 1 encoded.
779         msg_ctl.set_table_size(table_size - 1);
780 
781         MsixCap {
782             _cap_vndr: 0,
783             _cap_next: 0,
784             msg_ctl,
785             table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
786             pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
787         }
788     }
789 
790     #[cfg(unix)]
msg_ctl(&self) -> MsixCtrl791     pub fn msg_ctl(&self) -> MsixCtrl {
792         self.msg_ctl
793     }
794 }
795 
796 #[cfg(test)]
797 mod tests {
798 
799     use std::thread;
800 
801     use super::*;
802 
803     #[track_caller]
recv_allocate_msi(t: &Tube) -> u32804     fn recv_allocate_msi(t: &Tube) -> u32 {
805         match t.recv::<VmIrqRequest>().unwrap() {
806             VmIrqRequest::AllocateOneMsiAtGsi { gsi, .. } => gsi,
807             msg => panic!("unexpected irqchip message: {:?}", msg),
808         }
809     }
810 
811     struct MsiRouteDetails {
812         gsi: u32,
813         msi_address: u64,
814         msi_data: u32,
815     }
816 
817     #[track_caller]
recv_add_msi_route(t: &Tube) -> MsiRouteDetails818     fn recv_add_msi_route(t: &Tube) -> MsiRouteDetails {
819         match t.recv::<VmIrqRequest>().unwrap() {
820             VmIrqRequest::AddMsiRoute {
821                 gsi,
822                 msi_address,
823                 msi_data,
824             } => MsiRouteDetails {
825                 gsi,
826                 msi_address,
827                 msi_data,
828             },
829             msg => panic!("unexpected irqchip message: {:?}", msg),
830         }
831     }
832 
833     #[track_caller]
recv_release_one_irq(t: &Tube) -> u32834     fn recv_release_one_irq(t: &Tube) -> u32 {
835         match t.recv::<VmIrqRequest>().unwrap() {
836             VmIrqRequest::ReleaseOneIrq { gsi, irqfd: _ } => gsi,
837             msg => panic!("unexpected irqchip message: {:?}", msg),
838         }
839     }
840 
841     #[track_caller]
send_ok(t: &Tube)842     fn send_ok(t: &Tube) {
843         t.send(&VmIrqResponse::Ok).unwrap();
844     }
845 
846     /// Tests a cold restore where there are no existing vectors at the time
847     /// restore is called.
848     #[test]
verify_msix_restore_cold_smoke()849     fn verify_msix_restore_cold_smoke() {
850         let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
851         let (_unused, unused_config_tube) = Tube::pair().unwrap();
852 
853         let mut cfg = MsixConfig::new(2, unused_config_tube, 0, "test_device".to_owned());
854 
855         // Set up two MSI-X vectors (0 and 1).
856         // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
857         cfg.table_entries[0].msg_data = 0xd0;
858         cfg.table_entries[0].msg_addr_lo = 0xa0;
859         cfg.table_entries[0].msg_addr_hi = 0;
860         cfg.table_entries[1].msg_data = 0xd1;
861         cfg.table_entries[1].msg_addr_lo = 0xa1;
862         cfg.table_entries[1].msg_addr_hi = 0;
863 
864         // Pretend that these vectors were hooked up to GSIs 10 & 20,
865         // respectively.
866         cfg.irq_vec = vec![
867             Some(IrqfdGsi {
868                 gsi: 10,
869                 irqfd: Event::new().unwrap(),
870             }),
871             Some(IrqfdGsi {
872                 gsi: 20,
873                 irqfd: Event::new().unwrap(),
874             }),
875         ];
876 
877         // Take a snapshot of MsixConfig.
878         let snapshot = cfg.snapshot().unwrap();
879 
880         // Create a fake irqchip to respond to our requests
881         let irqchip_fake = thread::spawn(move || {
882             assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
883             send_ok(&irqchip_tube);
884             let route_one = recv_add_msi_route(&irqchip_tube);
885             assert_eq!(route_one.gsi, 10);
886             assert_eq!(route_one.msi_address, 0xa0);
887             assert_eq!(route_one.msi_data, 0xd0);
888             send_ok(&irqchip_tube);
889 
890             assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
891             send_ok(&irqchip_tube);
892             let route_two = recv_add_msi_route(&irqchip_tube);
893             assert_eq!(route_two.gsi, 20);
894             assert_eq!(route_two.msi_address, 0xa1);
895             assert_eq!(route_two.msi_data, 0xd1);
896             send_ok(&irqchip_tube);
897             irqchip_tube
898         });
899 
900         let mut restored_cfg = MsixConfig::new(10, msix_config_tube, 10, "some_device".to_owned());
901         restored_cfg.restore(snapshot).unwrap();
902         irqchip_fake.join().unwrap();
903 
904         assert_eq!(restored_cfg.pci_id, 0);
905         assert_eq!(restored_cfg.device_name, "test_device");
906     }
907 
908     /// Tests a warm restore where there are existing vectors at the time
909     /// restore is called. These vectors need to be released first.
910     #[test]
verify_msix_restore_warm_smoke()911     fn verify_msix_restore_warm_smoke() {
912         let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
913 
914         let mut cfg = MsixConfig::new(2, msix_config_tube, 0, "test_device".to_owned());
915 
916         // Set up two MSI-X vectors (0 and 1).
917         // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
918         cfg.table_entries[0].msg_data = 0xd0;
919         cfg.table_entries[0].msg_addr_lo = 0xa0;
920         cfg.table_entries[0].msg_addr_hi = 0;
921         cfg.table_entries[1].msg_data = 0xd1;
922         cfg.table_entries[1].msg_addr_lo = 0xa1;
923         cfg.table_entries[1].msg_addr_hi = 0;
924 
925         // Pretend that these vectors were hooked up to GSIs 10 & 20,
926         // respectively.
927         cfg.irq_vec = vec![
928             Some(IrqfdGsi {
929                 gsi: 10,
930                 irqfd: Event::new().unwrap(),
931             }),
932             Some(IrqfdGsi {
933                 gsi: 20,
934                 irqfd: Event::new().unwrap(),
935             }),
936         ];
937 
938         // Take a snapshot of MsixConfig.
939         let snapshot = cfg.snapshot().unwrap();
940 
941         // Create a fake irqchip to respond to our requests
942         let irqchip_fake = thread::spawn(move || {
943             // First, we free the existing vectors / GSIs.
944             assert_eq!(recv_release_one_irq(&irqchip_tube), 10);
945             send_ok(&irqchip_tube);
946             assert_eq!(recv_release_one_irq(&irqchip_tube), 20);
947             send_ok(&irqchip_tube);
948 
949             // Now we re-allocate them.
950             assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
951             send_ok(&irqchip_tube);
952             let route_one = recv_add_msi_route(&irqchip_tube);
953             assert_eq!(route_one.gsi, 10);
954             assert_eq!(route_one.msi_address, 0xa0);
955             assert_eq!(route_one.msi_data, 0xd0);
956             send_ok(&irqchip_tube);
957 
958             assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
959             send_ok(&irqchip_tube);
960             let route_two = recv_add_msi_route(&irqchip_tube);
961             assert_eq!(route_two.gsi, 20);
962             assert_eq!(route_two.msi_address, 0xa1);
963             assert_eq!(route_two.msi_data, 0xd1);
964             send_ok(&irqchip_tube);
965             irqchip_tube
966         });
967 
968         cfg.restore(snapshot).unwrap();
969         irqchip_fake.join().unwrap();
970 
971         assert_eq!(cfg.pci_id, 0);
972         assert_eq!(cfg.device_name, "test_device");
973     }
974 }
975