1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use crate::pci::{PciCapability, PciCapabilityID}; 6 use base::{error, AsRawDescriptor, Error as SysError, Event, RawDescriptor, Tube, TubeError}; 7 8 use std::convert::TryInto; 9 use std::fmt::{self, Display}; 10 use vm_control::{VmIrqRequest, VmIrqResponse}; 11 12 use data_model::DataInit; 13 14 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 15 pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 16 pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 17 pub const BITS_PER_PBA_ENTRY: usize = 64; 18 const FUNCTION_MASK_BIT: u16 = 0x4000; 19 const MSIX_ENABLE_BIT: u16 = 0x8000; 20 21 #[derive(Clone)] 22 struct MsixTableEntry { 23 msg_addr_lo: u32, 24 msg_addr_hi: u32, 25 msg_data: u32, 26 vector_ctl: u32, 27 } 28 29 impl MsixTableEntry { masked(&self) -> bool30 fn masked(&self) -> bool { 31 self.vector_ctl & 0x1 == 0x1 32 } 33 } 34 35 impl Default for MsixTableEntry { default() -> Self36 fn default() -> Self { 37 MsixTableEntry { 38 msg_addr_lo: 0, 39 msg_addr_hi: 0, 40 msg_data: 0, 41 vector_ctl: 0, 42 } 43 } 44 } 45 46 struct IrqfdGsi { 47 irqfd: Event, 48 gsi: u32, 49 } 50 51 /// Wrapper over MSI-X Capability Structure and MSI-X Tables 52 pub struct MsixConfig { 53 table_entries: Vec<MsixTableEntry>, 54 pba_entries: Vec<u64>, 55 irq_vec: Vec<IrqfdGsi>, 56 masked: bool, 57 enabled: bool, 58 msi_device_socket: Tube, 59 msix_num: u16, 60 } 61 62 enum MsixError { 63 AddMsiRoute(SysError), 64 AddMsiRouteRecv(TubeError), 65 AddMsiRouteSend(TubeError), 66 AllocateOneMsi(SysError), 67 AllocateOneMsiRecv(TubeError), 68 AllocateOneMsiSend(TubeError), 69 } 70 71 impl Display for MsixError { 72 #[remain::check] fmt(&self, f: &mut fmt::Formatter) -> fmt::Result73 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 74 use self::MsixError::*; 75 76 #[sorted] 77 match self { 78 AddMsiRoute(e) => write!(f, "AddMsiRoute failed: {}", e), 79 AddMsiRouteRecv(e) => write!(f, "failed to receive AddMsiRoute response: {}", e), 80 AddMsiRouteSend(e) => write!(f, "failed to send AddMsiRoute request: {}", e), 81 AllocateOneMsi(e) => write!(f, "AllocateOneMsi failed: {}", e), 82 AllocateOneMsiRecv(e) => write!(f, "failed to receive AllocateOneMsi response: {}", e), 83 AllocateOneMsiSend(e) => write!(f, "failed to send AllocateOneMsi request: {}", e), 84 } 85 } 86 } 87 88 type MsixResult<T> = std::result::Result<T, MsixError>; 89 90 pub enum MsixStatus { 91 Changed, 92 EntryChanged(usize), 93 NothingToDo, 94 } 95 96 impl MsixConfig { new(msix_vectors: u16, vm_socket: Tube) -> Self97 pub fn new(msix_vectors: u16, vm_socket: Tube) -> Self { 98 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 99 100 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 101 table_entries.resize_with(msix_vectors as usize, Default::default); 102 let mut pba_entries: Vec<u64> = Vec::new(); 103 let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; 104 pba_entries.resize_with(num_pba_entries, Default::default); 105 106 MsixConfig { 107 table_entries, 108 pba_entries, 109 irq_vec: Vec::new(), 110 masked: false, 111 enabled: false, 112 msi_device_socket: vm_socket, 113 msix_num: msix_vectors, 114 } 115 } 116 117 /// Get the number of MSI-X vectors in this configuration. num_vectors(&self) -> u16118 pub fn num_vectors(&self) -> u16 { 119 self.msix_num 120 } 121 122 /// Check whether the Function Mask bit in Message Control word in set or not. 123 /// if 1, all of the vectors associated with the function are masked, 124 /// regardless of their per-vector Mask bit states. 125 /// If 0, each vector's Mask bit determines whether the vector is masked or not. masked(&self) -> bool126 pub fn masked(&self) -> bool { 127 self.masked 128 } 129 130 /// Check whether the Function Mask bit in MSIX table Message Control 131 /// word in set or not. 132 /// If true, the vector is masked. 133 /// If false, the vector is unmasked. table_masked(&self, index: usize) -> bool134 pub fn table_masked(&self, index: usize) -> bool { 135 if index >= self.table_entries.len() { 136 true 137 } else { 138 self.table_entries[index].masked() 139 } 140 } 141 142 /// Check whether the MSI-X Enable bit in Message Control word in set or not. 143 /// if 1, the function is permitted to use MSI-X to request service. enabled(&self) -> bool144 pub fn enabled(&self) -> bool { 145 self.enabled 146 } 147 148 /// Read the MSI-X Capability Structure. 149 /// The top 2 bits in Message Control word are emulated and all other 150 /// bits are read only. read_msix_capability(&self, data: u32) -> u32151 pub fn read_msix_capability(&self, data: u32) -> u32 { 152 let mut msg_ctl = (data >> 16) as u16; 153 msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT); 154 155 if self.enabled { 156 msg_ctl |= MSIX_ENABLE_BIT; 157 } 158 if self.masked { 159 msg_ctl |= FUNCTION_MASK_BIT; 160 } 161 (msg_ctl as u32) << 16 | (data & u16::max_value() as u32) 162 } 163 164 /// Write to the MSI-X Capability Structure. 165 /// Only the top 2 bits in Message Control Word are writable. write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus166 pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus { 167 if offset == 2 && data.len() == 2 { 168 let reg = u16::from_le_bytes([data[0], data[1]]); 169 let old_masked = self.masked; 170 let old_enabled = self.enabled; 171 172 self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT; 173 self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT; 174 175 if !old_enabled && self.enabled { 176 if let Err(e) = self.msix_enable() { 177 error!("failed to enable MSI-X: {}", e); 178 self.enabled = false; 179 } 180 } 181 182 // If the Function Mask bit was set, and has just been cleared, it's 183 // important to go through the entire PBA to check if there was any 184 // pending MSI-X message to inject, given that the vector is not 185 // masked. 186 if old_masked && !self.masked { 187 for (index, entry) in self.table_entries.clone().iter().enumerate() { 188 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 189 self.inject_msix_and_clear_pba(index); 190 } 191 } 192 return MsixStatus::Changed; 193 } else if !old_masked && self.masked { 194 return MsixStatus::Changed; 195 } 196 } else { 197 error!( 198 "invalid write to MSI-X Capability Structure offset {:x}", 199 offset 200 ); 201 } 202 MsixStatus::NothingToDo 203 } 204 add_msi_route(&self, index: u16, gsi: u32) -> MsixResult<()>205 fn add_msi_route(&self, index: u16, gsi: u32) -> MsixResult<()> { 206 let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; 207 self.read_msix_table((index * 16).into(), data.as_mut()); 208 let msi_address: u64 = u64::from_le_bytes(data); 209 let mut data: [u8; 4] = [0, 0, 0, 0]; 210 self.read_msix_table((index * 16 + 8).into(), data.as_mut()); 211 let msi_data: u32 = u32::from_le_bytes(data); 212 213 if msi_address == 0 { 214 return Ok(()); 215 } 216 217 self.msi_device_socket 218 .send(&VmIrqRequest::AddMsiRoute { 219 gsi, 220 msi_address, 221 msi_data, 222 }) 223 .map_err(MsixError::AddMsiRouteSend)?; 224 if let VmIrqResponse::Err(e) = self 225 .msi_device_socket 226 .recv() 227 .map_err(MsixError::AddMsiRouteRecv)? 228 { 229 return Err(MsixError::AddMsiRoute(e)); 230 } 231 Ok(()) 232 } 233 msix_enable(&mut self) -> MsixResult<()>234 fn msix_enable(&mut self) -> MsixResult<()> { 235 self.irq_vec.clear(); 236 for i in 0..self.msix_num { 237 let irqfd = Event::new().unwrap(); 238 let request = VmIrqRequest::AllocateOneMsi { irqfd }; 239 self.msi_device_socket 240 .send(&request) 241 .map_err(MsixError::AllocateOneMsiSend)?; 242 let irq_num: u32; 243 match self 244 .msi_device_socket 245 .recv() 246 .map_err(MsixError::AllocateOneMsiRecv)? 247 { 248 VmIrqResponse::AllocateOneMsi { gsi } => irq_num = gsi, 249 VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)), 250 _ => unreachable!(), 251 } 252 self.irq_vec.push(IrqfdGsi { 253 irqfd: match request { 254 VmIrqRequest::AllocateOneMsi { irqfd } => irqfd, 255 _ => unreachable!(), 256 }, 257 gsi: irq_num, 258 }); 259 260 self.add_msi_route(i, irq_num)?; 261 } 262 Ok(()) 263 } 264 265 /// Read MSI-X table 266 /// # Arguments 267 /// * 'offset' - the offset within the MSI-X Table 268 /// * 'data' - used to store the read results 269 /// 270 /// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full 271 /// DWORD or aligned full QWORD transactions; otherwise, the result is undefined. 272 /// 273 /// location: DWORD3 DWORD2 DWORD1 DWORD0 274 /// entry 0: Vector Control Msg Data Msg Upper Addr Msg Addr 275 /// entry 1: Vector Control Msg Data Msg Upper Addr Msg Addr 276 /// entry 2: Vector Control Msg Data Msg Upper Addr Msg Addr 277 /// ... read_msix_table(&self, offset: u64, data: &mut [u8])278 pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) { 279 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 280 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 281 282 match data.len() { 283 4 => { 284 let value = match modulo_offset { 285 0x0 => self.table_entries[index].msg_addr_lo, 286 0x4 => self.table_entries[index].msg_addr_hi, 287 0x8 => self.table_entries[index].msg_data, 288 0xc => self.table_entries[index].vector_ctl, 289 _ => { 290 error!("invalid offset"); 291 0 292 } 293 }; 294 295 data.copy_from_slice(&value.to_le_bytes()); 296 } 297 8 => { 298 let value = match modulo_offset { 299 0x0 => { 300 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 301 | u64::from(self.table_entries[index].msg_addr_lo) 302 } 303 0x8 => { 304 (u64::from(self.table_entries[index].vector_ctl) << 32) 305 | u64::from(self.table_entries[index].msg_data) 306 } 307 _ => { 308 error!("invalid offset"); 309 0 310 } 311 }; 312 313 data.copy_from_slice(&value.to_le_bytes()); 314 } 315 _ => error!("invalid data length"), 316 }; 317 } 318 319 /// Write to MSI-X table 320 /// 321 /// Message Address: the contents of this field specifies the address 322 /// for the memory write transaction; different MSI-X vectors have 323 /// different Message Address values 324 /// Message Data: the contents of this field specifies the data driven 325 /// on AD[31::00] during the memory write transaction's data phase. 326 /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit 327 /// is set, the function is prohibited from sending a message using 328 /// this MSI-X Table entry. write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus329 pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus { 330 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 331 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 332 333 // Store the value of the entry before modification 334 let old_entry = self.table_entries[index].clone(); 335 336 match data.len() { 337 4 => { 338 let value = u32::from_le_bytes(data.try_into().unwrap()); 339 match modulo_offset { 340 0x0 => self.table_entries[index].msg_addr_lo = value, 341 0x4 => self.table_entries[index].msg_addr_hi = value, 342 0x8 => self.table_entries[index].msg_data = value, 343 0xc => self.table_entries[index].vector_ctl = value, 344 _ => error!("invalid offset"), 345 }; 346 } 347 8 => { 348 let value = u64::from_le_bytes(data.try_into().unwrap()); 349 match modulo_offset { 350 0x0 => { 351 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 352 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 353 } 354 0x8 => { 355 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 356 self.table_entries[index].vector_ctl = (value >> 32) as u32; 357 } 358 _ => error!("invalid offset"), 359 }; 360 } 361 _ => error!("invalid data length"), 362 }; 363 364 let new_entry = self.table_entries[index].clone(); 365 if self.enabled() 366 && (old_entry.msg_addr_lo != new_entry.msg_addr_lo 367 || old_entry.msg_addr_hi != new_entry.msg_addr_hi 368 || old_entry.msg_data != new_entry.msg_data) 369 { 370 let irq_num = self.irq_vec[index].gsi; 371 if let Err(e) = self.add_msi_route(index as u16, irq_num) { 372 error!("add_msi_route failed: {}", e); 373 } 374 } 375 376 // After the MSI-X table entry has been updated, it is necessary to 377 // check if the vector control masking bit has changed. In case the 378 // bit has been flipped from 1 to 0, we need to inject a MSI message 379 // if the corresponding pending bit from the PBA is set. Once the MSI 380 // has been injected, the pending bit in the PBA needs to be cleared. 381 // All of this is valid only if MSI-X has not been masked for the whole 382 // device. 383 384 // Check if bit has been flipped 385 if !self.masked() { 386 if old_entry.masked() && !self.table_entries[index].masked() { 387 if self.get_pba_bit(index as u16) == 1 { 388 self.inject_msix_and_clear_pba(index); 389 } 390 return MsixStatus::EntryChanged(index); 391 } else if !old_entry.masked() && self.table_entries[index].masked() { 392 return MsixStatus::EntryChanged(index); 393 } 394 } 395 MsixStatus::NothingToDo 396 } 397 398 /// Read PBA Entries 399 /// # Arguments 400 /// * 'offset' - the offset within the PBA entries 401 /// * 'data' - used to store the read results 402 /// 403 /// Pending Bits[63::00]: For each Pending Bit that is set, the function 404 /// has a pending message for the associated MSI-X Table entry. read_pba_entries(&self, offset: u64, data: &mut [u8])405 pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) { 406 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 407 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 408 409 match data.len() { 410 4 => { 411 let value: u32 = match modulo_offset { 412 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 413 0x4 => (self.pba_entries[index] >> 32) as u32, 414 _ => { 415 error!("invalid offset"); 416 0 417 } 418 }; 419 420 data.copy_from_slice(&value.to_le_bytes()); 421 } 422 8 => { 423 let value: u64 = match modulo_offset { 424 0x0 => self.pba_entries[index], 425 _ => { 426 error!("invalid offset"); 427 0 428 } 429 }; 430 431 data.copy_from_slice(&value.to_le_bytes()); 432 } 433 _ => error!("invalid data length"), 434 } 435 } 436 437 /// Write to PBA Entries 438 /// 439 /// Software should never write, and should only read Pending Bits. 440 /// If software writes to Pending Bits, the result is undefined. write_pba_entries(&mut self, _offset: u64, _data: &[u8])441 pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) { 442 error!("Pending Bit Array is read only"); 443 } 444 set_pba_bit(&mut self, vector: u16, set: bool)445 fn set_pba_bit(&mut self, vector: u16, set: bool) { 446 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 447 448 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 449 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 450 let mut mask: u64 = (1 << shift) as u64; 451 452 if set { 453 self.pba_entries[index] |= mask; 454 } else { 455 mask = !mask; 456 self.pba_entries[index] &= mask; 457 } 458 } 459 get_pba_bit(&self, vector: u16) -> u8460 fn get_pba_bit(&self, vector: u16) -> u8 { 461 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 462 463 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 464 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 465 466 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 467 } 468 inject_msix_and_clear_pba(&mut self, vector: usize)469 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 470 if let Some(irq) = self.irq_vec.get(vector) { 471 irq.irqfd.write(1).unwrap(); 472 } 473 474 // Clear the bit from PBA 475 self.set_pba_bit(vector as u16, false); 476 } 477 478 /// Inject virtual interrupt to the guest 479 /// 480 /// # Arguments 481 /// * 'vector' - the index to the MSI-X Table entry 482 /// 483 /// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is 484 /// prohibited from sending the associated message, and the function 485 /// must set the associated Pending bit whenever the function would 486 /// otherwise send the message. When software unmasks a vector whose 487 /// associated Pending bit is set, the function must schedule sending 488 /// the associated message, and clear the Pending bit as soon as the 489 /// message has been sent. 490 /// 491 /// If the vector is unmasked, writing to irqfd which wakes up KVM to 492 /// inject virtual interrupt to the guest. trigger(&mut self, vector: u16)493 pub fn trigger(&mut self, vector: u16) { 494 if self.table_entries[vector as usize].masked() || self.masked() { 495 self.set_pba_bit(vector, true); 496 } else if let Some(irq) = self.irq_vec.get(vector as usize) { 497 irq.irqfd.write(1).unwrap(); 498 } 499 } 500 501 /// Return the raw fd of the MSI device socket get_msi_socket(&self) -> RawDescriptor502 pub fn get_msi_socket(&self) -> RawDescriptor { 503 self.msi_device_socket.as_raw_descriptor() 504 } 505 506 /// Return irqfd of MSI-X Table entry 507 /// 508 /// # Arguments 509 /// * 'vector' - the index to the MSI-X table entry get_irqfd(&self, vector: usize) -> Option<&Event>510 pub fn get_irqfd(&self, vector: usize) -> Option<&Event> { 511 match self.irq_vec.get(vector) { 512 Some(irq) => Some(&irq.irqfd), 513 None => None, 514 } 515 } 516 } 517 518 impl AsRawDescriptor for MsixConfig { as_raw_descriptor(&self) -> RawDescriptor519 fn as_raw_descriptor(&self) -> RawDescriptor { 520 self.msi_device_socket.as_raw_descriptor() 521 } 522 } 523 524 // It is safe to implement DataInit; all members are simple numbers and any value is valid. 525 unsafe impl DataInit for MsixCap {} 526 527 #[allow(dead_code)] 528 #[repr(C)] 529 #[derive(Clone, Copy, Default)] 530 /// MSI-X Capability Structure 531 pub struct MsixCap { 532 // To make add_capability() happy 533 _cap_vndr: u8, 534 _cap_next: u8, 535 // Message Control Register 536 // 10-0: MSI-X Table size 537 // 13-11: Reserved 538 // 14: Mask. Mask all MSI-X when set. 539 // 15: Enable. Enable all MSI-X when set. 540 msg_ctl: u16, 541 // Table. Contains the offset and the BAR indicator (BIR) 542 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 543 // 31-3: Table offset in the BAR pointed by the BIR. 544 table: u32, 545 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 546 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 547 // 31-3: PBA offset in the BAR pointed by the BIR. 548 pba: u32, 549 } 550 551 impl PciCapability for MsixCap { bytes(&self) -> &[u8]552 fn bytes(&self) -> &[u8] { 553 self.as_slice() 554 } 555 id(&self) -> PciCapabilityID556 fn id(&self) -> PciCapabilityID { 557 PciCapabilityID::MSIX 558 } 559 } 560 561 impl MsixCap { new( table_pci_bar: u8, table_size: u16, table_off: u32, pba_pci_bar: u8, pba_off: u32, ) -> Self562 pub fn new( 563 table_pci_bar: u8, 564 table_size: u16, 565 table_off: u32, 566 pba_pci_bar: u8, 567 pba_off: u32, 568 ) -> Self { 569 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 570 571 // Set the table size and enable MSI-X. 572 let msg_ctl: u16 = MSIX_ENABLE_BIT + table_size - 1; 573 574 MsixCap { 575 _cap_vndr: 0, 576 _cap_next: 0, 577 msg_ctl, 578 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 579 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 580 } 581 } 582 } 583