1 // Copyright 2019 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use std::convert::TryInto; 6 7 use anyhow::Context; 8 use base::error; 9 use base::info; 10 use base::AsRawDescriptor; 11 use base::Error as SysError; 12 use base::Event; 13 use base::RawDescriptor; 14 use base::Tube; 15 use base::TubeError; 16 use bit_field::*; 17 use remain::sorted; 18 use serde::Deserialize; 19 use serde::Serialize; 20 use thiserror::Error; 21 use vm_control::VmIrqRequest; 22 use vm_control::VmIrqResponse; 23 use zerocopy::AsBytes; 24 use zerocopy::FromBytes; 25 26 use crate::pci::PciCapability; 27 use crate::pci::PciCapabilityID; 28 29 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 30 pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 31 pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 32 pub const BITS_PER_PBA_ENTRY: usize = 64; 33 const FUNCTION_MASK_BIT: u16 = 0x4000; 34 const MSIX_ENABLE_BIT: u16 = 0x8000; 35 const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1; 36 37 #[derive(Serialize, Deserialize, Clone, Default)] 38 struct MsixTableEntry { 39 msg_addr_lo: u32, 40 msg_addr_hi: u32, 41 msg_data: u32, 42 vector_ctl: u32, 43 } 44 45 impl MsixTableEntry { masked(&self) -> bool46 fn masked(&self) -> bool { 47 self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT 48 } 49 } 50 51 struct IrqfdGsi { 52 irqfd: Event, 53 gsi: u32, 54 } 55 56 /// Wrapper over MSI-X Capability Structure and MSI-X Tables 57 pub struct MsixConfig { 58 table_entries: Vec<MsixTableEntry>, 59 pba_entries: Vec<u64>, 60 irq_vec: Vec<Option<IrqfdGsi>>, 61 masked: bool, 62 enabled: bool, 63 msi_device_socket: Tube, 64 msix_num: u16, 65 pci_id: u32, 66 device_name: String, 67 } 68 69 #[derive(Serialize, Deserialize)] 70 struct MsixConfigSnapshot { 71 table_entries: Vec<MsixTableEntry>, 72 pba_entries: Vec<u64>, 73 /// Just like MsixConfig::irq_vec, but only the GSI. 74 irq_gsi_vec: Vec<Option<u32>>, 75 masked: bool, 76 enabled: bool, 77 msix_num: u16, 78 pci_id: u32, 79 device_name: String, 80 } 81 82 #[sorted] 83 #[derive(Error, Debug)] 84 pub enum MsixError { 85 #[error("AddMsiRoute failed: {0}")] 86 AddMsiRoute(SysError), 87 #[error("failed to receive AddMsiRoute response: {0}")] 88 AddMsiRouteRecv(TubeError), 89 #[error("failed to send AddMsiRoute request: {0}")] 90 AddMsiRouteSend(TubeError), 91 #[error("AllocateOneMsi failed: {0}")] 92 AllocateOneMsi(SysError), 93 #[error("failed to receive AllocateOneMsi response: {0}")] 94 AllocateOneMsiRecv(TubeError), 95 #[error("failed to send AllocateOneMsi request: {0}")] 96 AllocateOneMsiSend(TubeError), 97 #[error("failed to deserialize snapshot: {0}")] 98 DeserializationFailed(serde_json::Error), 99 #[error("invalid vector length in snapshot: {0}")] 100 InvalidVectorLength(std::num::TryFromIntError), 101 #[error("ReleaseOneIrq failed: {0}")] 102 ReleaseOneIrq(base::Error), 103 #[error("failed to receive ReleaseOneIrq response: {0}")] 104 ReleaseOneIrqRecv(TubeError), 105 #[error("failed to send ReleaseOneIrq request: {0}")] 106 ReleaseOneIrqSend(TubeError), 107 } 108 109 type MsixResult<T> = std::result::Result<T, MsixError>; 110 111 pub enum MsixStatus { 112 Changed, 113 EntryChanged(usize), 114 NothingToDo, 115 } 116 117 impl MsixConfig { new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self118 pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self { 119 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 120 121 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 122 table_entries.resize_with(msix_vectors as usize, Default::default); 123 table_entries 124 .iter_mut() 125 .for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT); 126 let mut pba_entries: Vec<u64> = Vec::new(); 127 let num_pba_entries: usize = 128 ((msix_vectors as usize) + BITS_PER_PBA_ENTRY - 1) / BITS_PER_PBA_ENTRY; 129 pba_entries.resize_with(num_pba_entries, Default::default); 130 131 let mut irq_vec = Vec::new(); 132 irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>); 133 134 MsixConfig { 135 table_entries, 136 pba_entries, 137 irq_vec, 138 masked: false, 139 enabled: false, 140 msi_device_socket: vm_socket, 141 msix_num: msix_vectors, 142 pci_id, 143 device_name, 144 } 145 } 146 147 /// Get the number of MSI-X vectors in this configuration. num_vectors(&self) -> u16148 pub fn num_vectors(&self) -> u16 { 149 self.msix_num 150 } 151 152 /// Check whether the Function Mask bit in Message Control word in set or not. 153 /// if 1, all of the vectors associated with the function are masked, 154 /// regardless of their per-vector Mask bit states. 155 /// If 0, each vector's Mask bit determines whether the vector is masked or not. masked(&self) -> bool156 pub fn masked(&self) -> bool { 157 self.masked 158 } 159 160 /// Check whether the Function Mask bit in MSIX table Message Control 161 /// word in set or not. 162 /// If true, the vector is masked. 163 /// If false, the vector is unmasked. table_masked(&self, index: usize) -> bool164 pub fn table_masked(&self, index: usize) -> bool { 165 if index >= self.table_entries.len() { 166 true 167 } else { 168 self.table_entries[index].masked() 169 } 170 } 171 172 /// Check whether the MSI-X Enable bit in Message Control word in set or not. 173 /// if 1, the function is permitted to use MSI-X to request service. enabled(&self) -> bool174 pub fn enabled(&self) -> bool { 175 self.enabled 176 } 177 178 /// Read the MSI-X Capability Structure. 179 /// The top 2 bits in Message Control word are emulated and all other 180 /// bits are read only. read_msix_capability(&self, data: u32) -> u32181 pub fn read_msix_capability(&self, data: u32) -> u32 { 182 let mut msg_ctl = (data >> 16) as u16; 183 msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT); 184 185 if self.enabled { 186 msg_ctl |= MSIX_ENABLE_BIT; 187 } 188 if self.masked { 189 msg_ctl |= FUNCTION_MASK_BIT; 190 } 191 (msg_ctl as u32) << 16 | (data & u16::max_value() as u32) 192 } 193 194 /// Write to the MSI-X Capability Structure. 195 /// Only the top 2 bits in Message Control Word are writable. write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus196 pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus { 197 if offset == 2 && data.len() == 2 { 198 let reg = u16::from_le_bytes([data[0], data[1]]); 199 let old_masked = self.masked; 200 let old_enabled = self.enabled; 201 202 self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT; 203 self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT; 204 205 if !old_enabled && self.enabled { 206 if let Err(e) = self.msix_enable_all() { 207 error!("failed to enable MSI-X: {}", e); 208 self.enabled = false; 209 } 210 } 211 212 // If the Function Mask bit was set, and has just been cleared, it's 213 // important to go through the entire PBA to check if there was any 214 // pending MSI-X message to inject, given that the vector is not 215 // masked. 216 if old_masked && !self.masked { 217 for (index, entry) in self.table_entries.clone().iter().enumerate() { 218 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 219 self.inject_msix_and_clear_pba(index); 220 } 221 } 222 return MsixStatus::Changed; 223 } else if !old_masked && self.masked { 224 return MsixStatus::Changed; 225 } 226 } else { 227 error!( 228 "invalid write to MSI-X Capability Structure offset {:x}", 229 offset 230 ); 231 } 232 MsixStatus::NothingToDo 233 } 234 235 /// Create a snapshot of the current MsixConfig struct for use in 236 /// snapshotting. snapshot(&mut self) -> anyhow::Result<serde_json::Value>237 pub fn snapshot(&mut self) -> anyhow::Result<serde_json::Value> { 238 serde_json::to_value(MsixConfigSnapshot { 239 table_entries: self.table_entries.clone(), 240 pba_entries: self.pba_entries.clone(), 241 masked: self.masked, 242 enabled: self.enabled, 243 msix_num: self.msix_num, 244 pci_id: self.pci_id, 245 device_name: self.device_name.clone(), 246 irq_gsi_vec: self 247 .irq_vec 248 .iter() 249 .map(|irq_opt| irq_opt.as_ref().map(|irq| irq.gsi)) 250 .collect(), 251 }) 252 .context("failed to serialize MsixConfigSnapshot") 253 } 254 255 /// Restore a MsixConfig struct based on a snapshot. In short, this will 256 /// restore all data exposed via MMIO, and recreate all MSI-X vectors (they 257 /// will be re-wired to the irq chip). restore(&mut self, snapshot: serde_json::Value) -> MsixResult<()>258 pub fn restore(&mut self, snapshot: serde_json::Value) -> MsixResult<()> { 259 let snapshot: MsixConfigSnapshot = 260 serde_json::from_value(snapshot).map_err(MsixError::DeserializationFailed)?; 261 262 self.table_entries = snapshot.table_entries; 263 self.pba_entries = snapshot.pba_entries; 264 self.masked = snapshot.masked; 265 self.enabled = snapshot.enabled; 266 self.msix_num = snapshot.msix_num; 267 self.pci_id = snapshot.pci_id; 268 self.device_name = snapshot.device_name; 269 270 self.msix_release_all()?; 271 self.irq_vec 272 .resize_with(snapshot.irq_gsi_vec.len(), || None::<IrqfdGsi>); 273 for (vector, gsi) in snapshot.irq_gsi_vec.iter().enumerate() { 274 if let Some(gsi_num) = gsi { 275 self.msix_restore_one(vector, *gsi_num)?; 276 } else { 277 info!( 278 "skipping restore of vector {} for device {}", 279 vector, self.device_name 280 ); 281 } 282 } 283 Ok(()) 284 } 285 286 /// Restore the specified MSI-X vector. 287 /// 288 /// Note: we skip the checks from [MsixConfig::msix_enable_one] because for 289 /// an interrupt to be present in [MsixConfigSnapshot::irq_gsi_vec], it must 290 /// have passed those checks. msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()>291 fn msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()> { 292 let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?; 293 let request = VmIrqRequest::AllocateOneMsiAtGsi { 294 irqfd, 295 gsi, 296 device_id: self.pci_id, 297 queue_id: index as usize, 298 device_name: self.device_name.clone(), 299 }; 300 self.msi_device_socket 301 .send(&request) 302 .map_err(MsixError::AllocateOneMsiSend)?; 303 if let VmIrqResponse::Err(e) = self 304 .msi_device_socket 305 .recv() 306 .map_err(MsixError::AllocateOneMsiRecv)? 307 { 308 return Err(MsixError::AllocateOneMsi(e)); 309 }; 310 311 self.irq_vec[index] = Some(IrqfdGsi { 312 irqfd: match request { 313 VmIrqRequest::AllocateOneMsiAtGsi { irqfd, .. } => irqfd, 314 _ => unreachable!(), 315 }, 316 gsi, 317 }); 318 self.add_msi_route(index as u16, gsi)?; 319 Ok(()) 320 } 321 322 /// On warm restore, there could already be MSIs registered. We need to 323 /// release them in case the routing has changed (e.g. different 324 /// data <-> GSI). msix_release_all(&mut self) -> MsixResult<()>325 fn msix_release_all(&mut self) -> MsixResult<()> { 326 for irqfd_gsi in self.irq_vec.drain(..).flatten() { 327 let request = VmIrqRequest::ReleaseOneIrq { 328 gsi: irqfd_gsi.gsi, 329 irqfd: irqfd_gsi.irqfd, 330 }; 331 332 self.msi_device_socket 333 .send(&request) 334 .map_err(MsixError::ReleaseOneIrqSend)?; 335 if let VmIrqResponse::Err(e) = self 336 .msi_device_socket 337 .recv() 338 .map_err(MsixError::ReleaseOneIrqRecv)? 339 { 340 return Err(MsixError::ReleaseOneIrq(e)); 341 } 342 } 343 Ok(()) 344 } 345 add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()>346 fn add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()> { 347 let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; 348 self.read_msix_table((index * 16).into(), data.as_mut()); 349 let msi_address: u64 = u64::from_le_bytes(data); 350 let mut data: [u8; 4] = [0, 0, 0, 0]; 351 self.read_msix_table((index * 16 + 8).into(), data.as_mut()); 352 let msi_data: u32 = u32::from_le_bytes(data); 353 354 if msi_address == 0 { 355 return Ok(()); 356 } 357 358 self.msi_device_socket 359 .send(&VmIrqRequest::AddMsiRoute { 360 gsi, 361 msi_address, 362 msi_data, 363 }) 364 .map_err(MsixError::AddMsiRouteSend)?; 365 if let VmIrqResponse::Err(e) = self 366 .msi_device_socket 367 .recv() 368 .map_err(MsixError::AddMsiRouteRecv)? 369 { 370 return Err(MsixError::AddMsiRoute(e)); 371 } 372 Ok(()) 373 } 374 375 // Enable MSI-X msix_enable_all(&mut self) -> MsixResult<()>376 fn msix_enable_all(&mut self) -> MsixResult<()> { 377 for index in 0..self.irq_vec.len() { 378 self.msix_enable_one(index)?; 379 } 380 Ok(()) 381 } 382 383 // Use a new MSI-X vector 384 // Create a new eventfd and bind them to a new msi msix_enable_one(&mut self, index: usize) -> MsixResult<()>385 fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> { 386 if self.irq_vec[index].is_some() 387 || !self.enabled() 388 || self.masked() 389 || self.table_masked(index) 390 { 391 return Ok(()); 392 } 393 let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?; 394 let request = VmIrqRequest::AllocateOneMsi { 395 irqfd, 396 device_id: self.pci_id, 397 queue_id: index as usize, 398 device_name: self.device_name.clone(), 399 }; 400 self.msi_device_socket 401 .send(&request) 402 .map_err(MsixError::AllocateOneMsiSend)?; 403 let irq_num: u32 = match self 404 .msi_device_socket 405 .recv() 406 .map_err(MsixError::AllocateOneMsiRecv)? 407 { 408 VmIrqResponse::AllocateOneMsi { gsi } => gsi, 409 VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)), 410 _ => unreachable!(), 411 }; 412 self.irq_vec[index] = Some(IrqfdGsi { 413 irqfd: match request { 414 VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd, 415 _ => unreachable!(), 416 }, 417 gsi: irq_num, 418 }); 419 420 self.add_msi_route(index as u16, irq_num)?; 421 Ok(()) 422 } 423 424 /// Read MSI-X table 425 /// # Arguments 426 /// * 'offset' - the offset within the MSI-X Table 427 /// * 'data' - used to store the read results 428 /// 429 /// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full 430 /// DWORD or aligned full QWORD transactions; otherwise, the result is undefined. 431 /// 432 /// location: DWORD3 DWORD2 DWORD1 DWORD0 433 /// entry 0: Vector Control Msg Data Msg Upper Addr Msg Addr 434 /// entry 1: Vector Control Msg Data Msg Upper Addr Msg Addr 435 /// entry 2: Vector Control Msg Data Msg Upper Addr Msg Addr 436 /// ... read_msix_table(&self, offset: u64, data: &mut [u8])437 pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) { 438 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 439 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 440 441 match data.len() { 442 4 => { 443 let value = match modulo_offset { 444 0x0 => self.table_entries[index].msg_addr_lo, 445 0x4 => self.table_entries[index].msg_addr_hi, 446 0x8 => self.table_entries[index].msg_data, 447 0xc => self.table_entries[index].vector_ctl, 448 _ => { 449 error!("invalid offset"); 450 0 451 } 452 }; 453 454 data.copy_from_slice(&value.to_le_bytes()); 455 } 456 8 => { 457 let value = match modulo_offset { 458 0x0 => { 459 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 460 | u64::from(self.table_entries[index].msg_addr_lo) 461 } 462 0x8 => { 463 (u64::from(self.table_entries[index].vector_ctl) << 32) 464 | u64::from(self.table_entries[index].msg_data) 465 } 466 _ => { 467 error!("invalid offset"); 468 0 469 } 470 }; 471 472 data.copy_from_slice(&value.to_le_bytes()); 473 } 474 _ => error!("invalid data length"), 475 }; 476 } 477 478 /// Write to MSI-X table 479 /// 480 /// Message Address: the contents of this field specifies the address 481 /// for the memory write transaction; different MSI-X vectors have 482 /// different Message Address values 483 /// Message Data: the contents of this field specifies the data driven 484 /// on AD\[31::00\] during the memory write transaction's data phase. 485 /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit 486 /// is set, the function is prohibited from sending a message using 487 /// this MSI-X Table entry. write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus488 pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus { 489 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 490 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 491 492 // Store the value of the entry before modification 493 let old_entry = self.table_entries[index].clone(); 494 495 match data.len() { 496 4 => { 497 let value = u32::from_le_bytes(data.try_into().unwrap()); 498 match modulo_offset { 499 0x0 => self.table_entries[index].msg_addr_lo = value, 500 0x4 => self.table_entries[index].msg_addr_hi = value, 501 0x8 => self.table_entries[index].msg_data = value, 502 0xc => self.table_entries[index].vector_ctl = value, 503 _ => error!("invalid offset"), 504 }; 505 } 506 8 => { 507 let value = u64::from_le_bytes(data.try_into().unwrap()); 508 match modulo_offset { 509 0x0 => { 510 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 511 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 512 } 513 0x8 => { 514 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 515 self.table_entries[index].vector_ctl = (value >> 32) as u32; 516 } 517 _ => error!("invalid offset"), 518 }; 519 } 520 _ => error!("invalid data length"), 521 }; 522 523 let new_entry = self.table_entries[index].clone(); 524 525 // This MSI-X vector is enabled for the first time. 526 if self.enabled() 527 && !self.masked() 528 && self.irq_vec[index].is_none() 529 && old_entry.masked() 530 && !new_entry.masked() 531 { 532 if let Err(e) = self.msix_enable_one(index) { 533 error!("failed to enable MSI-X vector {}: {}", index, e); 534 self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT; 535 } 536 return MsixStatus::EntryChanged(index); 537 } 538 539 if self.enabled() 540 && (old_entry.msg_addr_lo != new_entry.msg_addr_lo 541 || old_entry.msg_addr_hi != new_entry.msg_addr_hi 542 || old_entry.msg_data != new_entry.msg_data) 543 { 544 if let Some(irqfd_gsi) = &self.irq_vec[index] { 545 let irq_num = irqfd_gsi.gsi; 546 if let Err(e) = self.add_msi_route(index as u16, irq_num) { 547 error!("add_msi_route failed: {}", e); 548 } 549 } 550 } 551 552 // After the MSI-X table entry has been updated, it is necessary to 553 // check if the vector control masking bit has changed. In case the 554 // bit has been flipped from 1 to 0, we need to inject a MSI message 555 // if the corresponding pending bit from the PBA is set. Once the MSI 556 // has been injected, the pending bit in the PBA needs to be cleared. 557 // All of this is valid only if MSI-X has not been masked for the whole 558 // device. 559 560 // Check if bit has been flipped 561 if !self.masked() { 562 if old_entry.masked() && !self.table_entries[index].masked() { 563 if self.get_pba_bit(index as u16) == 1 { 564 self.inject_msix_and_clear_pba(index); 565 } 566 return MsixStatus::EntryChanged(index); 567 } else if !old_entry.masked() && self.table_entries[index].masked() { 568 return MsixStatus::EntryChanged(index); 569 } 570 } 571 MsixStatus::NothingToDo 572 } 573 574 /// Read PBA Entries 575 /// # Arguments 576 /// * 'offset' - the offset within the PBA entries 577 /// * 'data' - used to store the read results 578 /// 579 /// Pending Bits\[63::00\]: For each Pending Bit that is set, the function 580 /// has a pending message for the associated MSI-X Table entry. read_pba_entries(&self, offset: u64, data: &mut [u8])581 pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) { 582 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 583 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 584 585 match data.len() { 586 4 => { 587 let value: u32 = match modulo_offset { 588 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 589 0x4 => (self.pba_entries[index] >> 32) as u32, 590 _ => { 591 error!("invalid offset"); 592 0 593 } 594 }; 595 596 data.copy_from_slice(&value.to_le_bytes()); 597 } 598 8 => { 599 let value: u64 = match modulo_offset { 600 0x0 => self.pba_entries[index], 601 _ => { 602 error!("invalid offset"); 603 0 604 } 605 }; 606 607 data.copy_from_slice(&value.to_le_bytes()); 608 } 609 _ => error!("invalid data length"), 610 } 611 } 612 613 /// Write to PBA Entries 614 /// 615 /// Software should never write, and should only read Pending Bits. 616 /// If software writes to Pending Bits, the result is undefined. write_pba_entries(&mut self, _offset: u64, _data: &[u8])617 pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) { 618 error!("Pending Bit Array is read only"); 619 } 620 set_pba_bit(&mut self, vector: u16, set: bool)621 fn set_pba_bit(&mut self, vector: u16, set: bool) { 622 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 623 624 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 625 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 626 let mut mask: u64 = (1 << shift) as u64; 627 628 if set { 629 self.pba_entries[index] |= mask; 630 } else { 631 mask = !mask; 632 self.pba_entries[index] &= mask; 633 } 634 } 635 get_pba_bit(&self, vector: u16) -> u8636 fn get_pba_bit(&self, vector: u16) -> u8 { 637 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 638 639 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 640 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 641 642 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 643 } 644 inject_msix_and_clear_pba(&mut self, vector: usize)645 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 646 if let Some(irq) = &self.irq_vec[vector] { 647 irq.irqfd.signal().unwrap(); 648 } 649 650 // Clear the bit from PBA 651 self.set_pba_bit(vector as u16, false); 652 } 653 654 /// Inject virtual interrupt to the guest 655 /// 656 /// # Arguments 657 /// * 'vector' - the index to the MSI-X Table entry 658 /// 659 /// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is 660 /// prohibited from sending the associated message, and the function 661 /// must set the associated Pending bit whenever the function would 662 /// otherwise send the message. When software unmasks a vector whose 663 /// associated Pending bit is set, the function must schedule sending 664 /// the associated message, and clear the Pending bit as soon as the 665 /// message has been sent. 666 /// 667 /// If the vector is unmasked, writing to irqfd which wakes up KVM to 668 /// inject virtual interrupt to the guest. trigger(&mut self, vector: u16)669 pub fn trigger(&mut self, vector: u16) { 670 if self.table_entries[vector as usize].masked() || self.masked() { 671 self.set_pba_bit(vector, true); 672 } else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) { 673 irq.irqfd.signal().unwrap(); 674 } 675 } 676 677 /// Return the raw descriptor of the MSI device socket get_msi_socket(&self) -> RawDescriptor678 pub fn get_msi_socket(&self) -> RawDescriptor { 679 self.msi_device_socket.as_raw_descriptor() 680 } 681 682 /// Return irqfd of MSI-X Table entry 683 /// 684 /// # Arguments 685 /// * 'vector' - the index to the MSI-X table entry get_irqfd(&self, vector: usize) -> Option<&Event>686 pub fn get_irqfd(&self, vector: usize) -> Option<&Event> { 687 match self.irq_vec.get(vector as usize).unwrap_or(&None) { 688 Some(irq) => Some(&irq.irqfd), 689 None => None, 690 } 691 } 692 destroy(&mut self)693 pub fn destroy(&mut self) { 694 while let Some(irq) = self.irq_vec.pop() { 695 if let Some(irq) = irq { 696 let request = VmIrqRequest::ReleaseOneIrq { 697 gsi: irq.gsi, 698 irqfd: irq.irqfd, 699 }; 700 if self.msi_device_socket.send(&request).is_err() { 701 continue; 702 } 703 let _ = self.msi_device_socket.recv::<VmIrqResponse>(); 704 } 705 } 706 } 707 } 708 709 impl AsRawDescriptor for MsixConfig { as_raw_descriptor(&self) -> RawDescriptor710 fn as_raw_descriptor(&self) -> RawDescriptor { 711 self.msi_device_socket.as_raw_descriptor() 712 } 713 } 714 715 /// Message Control Register 716 // 10-0: MSI-X Table size 717 // 13-11: Reserved 718 // 14: Mask. Mask all MSI-X when set. 719 // 15: Enable. Enable all MSI-X when set. 720 // See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details. 721 #[bitfield] 722 #[derive(Copy, Clone, Default, AsBytes, FromBytes)] 723 pub struct MsixCtrl { 724 table_size: B10, 725 reserved: B4, 726 mask: B1, 727 enable: B1, 728 } 729 730 #[allow(dead_code)] 731 #[repr(C)] 732 #[derive(Clone, Copy, Default, AsBytes, FromBytes)] 733 /// MSI-X Capability Structure 734 pub struct MsixCap { 735 // To make add_capability() happy 736 _cap_vndr: u8, 737 _cap_next: u8, 738 // Message Control Register 739 msg_ctl: MsixCtrl, 740 // Table. Contains the offset and the BAR indicator (BIR) 741 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 742 // 31-3: Table offset in the BAR pointed by the BIR. 743 table: u32, 744 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 745 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 746 // 31-3: PBA offset in the BAR pointed by the BIR. 747 pba: u32, 748 } 749 750 impl PciCapability for MsixCap { bytes(&self) -> &[u8]751 fn bytes(&self) -> &[u8] { 752 self.as_bytes() 753 } 754 id(&self) -> PciCapabilityID755 fn id(&self) -> PciCapabilityID { 756 PciCapabilityID::Msix 757 } 758 writable_bits(&self) -> Vec<u32>759 fn writable_bits(&self) -> Vec<u32> { 760 // Only msg_ctl[15:14] is writable 761 vec![0x3000_0000, 0, 0] 762 } 763 } 764 765 impl MsixCap { new( table_pci_bar: u8, table_size: u16, table_off: u32, pba_pci_bar: u8, pba_off: u32, ) -> Self766 pub fn new( 767 table_pci_bar: u8, 768 table_size: u16, 769 table_off: u32, 770 pba_pci_bar: u8, 771 pba_off: u32, 772 ) -> Self { 773 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 774 775 // Set the table size and enable MSI-X. 776 let mut msg_ctl = MsixCtrl::new(); 777 msg_ctl.set_enable(1); 778 // Table Size is N - 1 encoded. 779 msg_ctl.set_table_size(table_size - 1); 780 781 MsixCap { 782 _cap_vndr: 0, 783 _cap_next: 0, 784 msg_ctl, 785 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 786 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 787 } 788 } 789 790 #[cfg(unix)] msg_ctl(&self) -> MsixCtrl791 pub fn msg_ctl(&self) -> MsixCtrl { 792 self.msg_ctl 793 } 794 } 795 796 #[cfg(test)] 797 mod tests { 798 799 use std::thread; 800 801 use super::*; 802 803 #[track_caller] recv_allocate_msi(t: &Tube) -> u32804 fn recv_allocate_msi(t: &Tube) -> u32 { 805 match t.recv::<VmIrqRequest>().unwrap() { 806 VmIrqRequest::AllocateOneMsiAtGsi { gsi, .. } => gsi, 807 msg => panic!("unexpected irqchip message: {:?}", msg), 808 } 809 } 810 811 struct MsiRouteDetails { 812 gsi: u32, 813 msi_address: u64, 814 msi_data: u32, 815 } 816 817 #[track_caller] recv_add_msi_route(t: &Tube) -> MsiRouteDetails818 fn recv_add_msi_route(t: &Tube) -> MsiRouteDetails { 819 match t.recv::<VmIrqRequest>().unwrap() { 820 VmIrqRequest::AddMsiRoute { 821 gsi, 822 msi_address, 823 msi_data, 824 } => MsiRouteDetails { 825 gsi, 826 msi_address, 827 msi_data, 828 }, 829 msg => panic!("unexpected irqchip message: {:?}", msg), 830 } 831 } 832 833 #[track_caller] recv_release_one_irq(t: &Tube) -> u32834 fn recv_release_one_irq(t: &Tube) -> u32 { 835 match t.recv::<VmIrqRequest>().unwrap() { 836 VmIrqRequest::ReleaseOneIrq { gsi, irqfd: _ } => gsi, 837 msg => panic!("unexpected irqchip message: {:?}", msg), 838 } 839 } 840 841 #[track_caller] send_ok(t: &Tube)842 fn send_ok(t: &Tube) { 843 t.send(&VmIrqResponse::Ok).unwrap(); 844 } 845 846 /// Tests a cold restore where there are no existing vectors at the time 847 /// restore is called. 848 #[test] verify_msix_restore_cold_smoke()849 fn verify_msix_restore_cold_smoke() { 850 let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap(); 851 let (_unused, unused_config_tube) = Tube::pair().unwrap(); 852 853 let mut cfg = MsixConfig::new(2, unused_config_tube, 0, "test_device".to_owned()); 854 855 // Set up two MSI-X vectors (0 and 1). 856 // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM. 857 cfg.table_entries[0].msg_data = 0xd0; 858 cfg.table_entries[0].msg_addr_lo = 0xa0; 859 cfg.table_entries[0].msg_addr_hi = 0; 860 cfg.table_entries[1].msg_data = 0xd1; 861 cfg.table_entries[1].msg_addr_lo = 0xa1; 862 cfg.table_entries[1].msg_addr_hi = 0; 863 864 // Pretend that these vectors were hooked up to GSIs 10 & 20, 865 // respectively. 866 cfg.irq_vec = vec![ 867 Some(IrqfdGsi { 868 gsi: 10, 869 irqfd: Event::new().unwrap(), 870 }), 871 Some(IrqfdGsi { 872 gsi: 20, 873 irqfd: Event::new().unwrap(), 874 }), 875 ]; 876 877 // Take a snapshot of MsixConfig. 878 let snapshot = cfg.snapshot().unwrap(); 879 880 // Create a fake irqchip to respond to our requests 881 let irqchip_fake = thread::spawn(move || { 882 assert_eq!(recv_allocate_msi(&irqchip_tube), 10); 883 send_ok(&irqchip_tube); 884 let route_one = recv_add_msi_route(&irqchip_tube); 885 assert_eq!(route_one.gsi, 10); 886 assert_eq!(route_one.msi_address, 0xa0); 887 assert_eq!(route_one.msi_data, 0xd0); 888 send_ok(&irqchip_tube); 889 890 assert_eq!(recv_allocate_msi(&irqchip_tube), 20); 891 send_ok(&irqchip_tube); 892 let route_two = recv_add_msi_route(&irqchip_tube); 893 assert_eq!(route_two.gsi, 20); 894 assert_eq!(route_two.msi_address, 0xa1); 895 assert_eq!(route_two.msi_data, 0xd1); 896 send_ok(&irqchip_tube); 897 irqchip_tube 898 }); 899 900 let mut restored_cfg = MsixConfig::new(10, msix_config_tube, 10, "some_device".to_owned()); 901 restored_cfg.restore(snapshot).unwrap(); 902 irqchip_fake.join().unwrap(); 903 904 assert_eq!(restored_cfg.pci_id, 0); 905 assert_eq!(restored_cfg.device_name, "test_device"); 906 } 907 908 /// Tests a warm restore where there are existing vectors at the time 909 /// restore is called. These vectors need to be released first. 910 #[test] verify_msix_restore_warm_smoke()911 fn verify_msix_restore_warm_smoke() { 912 let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap(); 913 914 let mut cfg = MsixConfig::new(2, msix_config_tube, 0, "test_device".to_owned()); 915 916 // Set up two MSI-X vectors (0 and 1). 917 // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM. 918 cfg.table_entries[0].msg_data = 0xd0; 919 cfg.table_entries[0].msg_addr_lo = 0xa0; 920 cfg.table_entries[0].msg_addr_hi = 0; 921 cfg.table_entries[1].msg_data = 0xd1; 922 cfg.table_entries[1].msg_addr_lo = 0xa1; 923 cfg.table_entries[1].msg_addr_hi = 0; 924 925 // Pretend that these vectors were hooked up to GSIs 10 & 20, 926 // respectively. 927 cfg.irq_vec = vec![ 928 Some(IrqfdGsi { 929 gsi: 10, 930 irqfd: Event::new().unwrap(), 931 }), 932 Some(IrqfdGsi { 933 gsi: 20, 934 irqfd: Event::new().unwrap(), 935 }), 936 ]; 937 938 // Take a snapshot of MsixConfig. 939 let snapshot = cfg.snapshot().unwrap(); 940 941 // Create a fake irqchip to respond to our requests 942 let irqchip_fake = thread::spawn(move || { 943 // First, we free the existing vectors / GSIs. 944 assert_eq!(recv_release_one_irq(&irqchip_tube), 10); 945 send_ok(&irqchip_tube); 946 assert_eq!(recv_release_one_irq(&irqchip_tube), 20); 947 send_ok(&irqchip_tube); 948 949 // Now we re-allocate them. 950 assert_eq!(recv_allocate_msi(&irqchip_tube), 10); 951 send_ok(&irqchip_tube); 952 let route_one = recv_add_msi_route(&irqchip_tube); 953 assert_eq!(route_one.gsi, 10); 954 assert_eq!(route_one.msi_address, 0xa0); 955 assert_eq!(route_one.msi_data, 0xd0); 956 send_ok(&irqchip_tube); 957 958 assert_eq!(recv_allocate_msi(&irqchip_tube), 20); 959 send_ok(&irqchip_tube); 960 let route_two = recv_add_msi_route(&irqchip_tube); 961 assert_eq!(route_two.gsi, 20); 962 assert_eq!(route_two.msi_address, 0xa1); 963 assert_eq!(route_two.msi_data, 0xd1); 964 send_ok(&irqchip_tube); 965 irqchip_tube 966 }); 967 968 cfg.restore(snapshot).unwrap(); 969 irqchip_fake.join().unwrap(); 970 971 assert_eq!(cfg.pci_id, 0); 972 assert_eq!(cfg.device_name, "test_device"); 973 } 974 } 975