• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::cmp;
6 use std::fmt::{self, Display};
7 use std::mem;
8 use std::net::Ipv4Addr;
9 use std::os::unix::io::{AsRawFd, RawFd};
10 use std::sync::atomic::{AtomicUsize, Ordering};
11 use std::sync::Arc;
12 use std::thread;
13 
14 use libc::EAGAIN;
15 use net_sys;
16 use net_util::{Error as TapError, MacAddress, TapT};
17 use sys_util::Error as SysError;
18 use sys_util::{error, warn, EventFd, GuestMemory, PollContext, PollToken};
19 use virtio_sys::virtio_net::virtio_net_hdr_v1;
20 use virtio_sys::{vhost, virtio_net};
21 
22 use super::{Queue, VirtioDevice, INTERRUPT_STATUS_USED_RING, TYPE_NET};
23 
24 /// The maximum buffer size when segmentation offload is enabled. This
25 /// includes the 12-byte virtio net header.
26 /// http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html#x1-1740003
27 const MAX_BUFFER_SIZE: usize = 65562;
28 const QUEUE_SIZE: u16 = 256;
29 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE, QUEUE_SIZE];
30 
31 #[derive(Debug)]
32 pub enum NetError {
33     /// Creating kill eventfd failed.
34     CreateKillEventFd(SysError),
35     /// Creating PollContext failed.
36     CreatePollContext(SysError),
37     /// Cloning kill eventfd failed.
38     CloneKillEventFd(SysError),
39     /// Open tap device failed.
40     TapOpen(TapError),
41     /// Setting tap IP failed.
42     TapSetIp(TapError),
43     /// Setting tap netmask failed.
44     TapSetNetmask(TapError),
45     /// Setting tap mac address failed.
46     TapSetMacAddress(TapError),
47     /// Setting tap interface offload flags failed.
48     TapSetOffload(TapError),
49     /// Setting vnet header size failed.
50     TapSetVnetHdrSize(TapError),
51     /// Enabling tap interface failed.
52     TapEnable(TapError),
53     /// Validating tap interface failed.
54     TapValidate(String),
55     /// Error while polling for events.
56     PollError(SysError),
57 }
58 
59 impl Display for NetError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result60     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
61         use self::NetError::*;
62 
63         match self {
64             CreateKillEventFd(e) => write!(f, "failed to create kill eventfd: {}", e),
65             CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
66             CloneKillEventFd(e) => write!(f, "failed to clone kill eventfd: {}", e),
67             TapOpen(e) => write!(f, "failed to open tap device: {}", e),
68             TapSetIp(e) => write!(f, "failed to set tap IP: {}", e),
69             TapSetNetmask(e) => write!(f, "failed to set tap netmask: {}", e),
70             TapSetMacAddress(e) => write!(f, "failed to set tap mac address: {}", e),
71             TapSetOffload(e) => write!(f, "failed to set tap interface offload flags: {}", e),
72             TapSetVnetHdrSize(e) => write!(f, "failed to set vnet header size: {}", e),
73             TapEnable(e) => write!(f, "failed to enable tap interface: {}", e),
74             TapValidate(s) => write!(f, "failed to validate tap interface: {}", s),
75             PollError(e) => write!(f, "error while polling for events: {}", e),
76         }
77     }
78 }
79 
80 struct Worker<T: TapT> {
81     mem: GuestMemory,
82     rx_queue: Queue,
83     tx_queue: Queue,
84     tap: T,
85     interrupt_status: Arc<AtomicUsize>,
86     interrupt_evt: EventFd,
87     interrupt_resample_evt: EventFd,
88     rx_buf: [u8; MAX_BUFFER_SIZE],
89     rx_count: usize,
90     deferred_rx: bool,
91     // TODO(smbarber): http://crbug.com/753630
92     // Remove once MRG_RXBUF is supported and this variable is actually used.
93     #[allow(dead_code)]
94     acked_features: u64,
95 }
96 
97 impl<T> Worker<T>
98 where
99     T: TapT,
100 {
signal_used_queue(&self)101     fn signal_used_queue(&self) {
102         self.interrupt_status
103             .fetch_or(INTERRUPT_STATUS_USED_RING as usize, Ordering::SeqCst);
104         self.interrupt_evt.write(1).unwrap();
105     }
106 
107     // Copies a single frame from `self.rx_buf` into the guest. Returns true
108     // if a buffer was used, and false if the frame must be deferred until a buffer
109     // is made available by the driver.
rx_single_frame(&mut self) -> bool110     fn rx_single_frame(&mut self) -> bool {
111         let mut next_desc = self.rx_queue.pop(&self.mem);
112 
113         if next_desc.is_none() {
114             return false;
115         }
116 
117         // We just checked that the head descriptor exists.
118         let head_index = next_desc.as_ref().unwrap().index;
119         let mut write_count = 0;
120 
121         // Copy from frame into buffer, which may span multiple descriptors.
122         loop {
123             match next_desc {
124                 Some(desc) => {
125                     if !desc.is_write_only() {
126                         break;
127                     }
128                     let limit = cmp::min(write_count + desc.len as usize, self.rx_count);
129                     let source_slice = &self.rx_buf[write_count..limit];
130                     let write_result = self.mem.write_at_addr(source_slice, desc.addr);
131 
132                     match write_result {
133                         Ok(sz) => {
134                             write_count += sz;
135                         }
136                         Err(e) => {
137                             warn!("net: rx: failed to write slice: {}", e);
138                             break;
139                         }
140                     };
141 
142                     if write_count >= self.rx_count {
143                         break;
144                     }
145                     next_desc = desc.next_descriptor();
146                 }
147                 None => {
148                     warn!(
149                         "net: rx: buffer is too small to hold frame of size {}",
150                         self.rx_count
151                     );
152                     break;
153                 }
154             }
155         }
156 
157         self.rx_queue
158             .add_used(&self.mem, head_index, write_count as u32);
159 
160         // Interrupt the guest immediately for received frames to
161         // reduce latency.
162         self.signal_used_queue();
163 
164         true
165     }
166 
process_rx(&mut self)167     fn process_rx(&mut self) {
168         // Read as many frames as possible.
169         loop {
170             let res = self.tap.read(&mut self.rx_buf);
171             match res {
172                 Ok(count) => {
173                     self.rx_count = count;
174                     if !self.rx_single_frame() {
175                         self.deferred_rx = true;
176                         break;
177                     }
178                 }
179                 Err(e) => {
180                     // The tap device is nonblocking, so any error aside from EAGAIN is
181                     // unexpected.
182                     if e.raw_os_error().unwrap() != EAGAIN {
183                         warn!("net: rx: failed to read tap: {}", e);
184                     }
185                     break;
186                 }
187             }
188         }
189     }
190 
process_tx(&mut self)191     fn process_tx(&mut self) {
192         let mut frame = [0u8; MAX_BUFFER_SIZE];
193 
194         while let Some(avail_desc) = self.tx_queue.pop(&self.mem) {
195             let head_index = avail_desc.index;
196             let mut next_desc = Some(avail_desc);
197             let mut read_count = 0;
198 
199             // Copy buffer from across multiple descriptors.
200             while let Some(desc) = next_desc {
201                 if desc.is_write_only() {
202                     break;
203                 }
204                 let limit = cmp::min(read_count + desc.len as usize, frame.len());
205                 let read_result = self
206                     .mem
207                     .read_at_addr(&mut frame[read_count..limit as usize], desc.addr);
208                 match read_result {
209                     Ok(sz) => {
210                         read_count += sz;
211                     }
212                     Err(e) => {
213                         warn!("net: tx: failed to read slice: {}", e);
214                         break;
215                     }
216                 }
217                 next_desc = desc.next_descriptor();
218             }
219 
220             let write_result = self.tap.write(&frame[..read_count as usize]);
221             match write_result {
222                 Ok(_) => {}
223                 Err(e) => {
224                     warn!("net: tx: error failed to write to tap: {}", e);
225                 }
226             };
227 
228             self.tx_queue.add_used(&self.mem, head_index, 0);
229         }
230 
231         self.signal_used_queue();
232     }
233 
run( &mut self, rx_queue_evt: EventFd, tx_queue_evt: EventFd, kill_evt: EventFd, ) -> Result<(), NetError>234     fn run(
235         &mut self,
236         rx_queue_evt: EventFd,
237         tx_queue_evt: EventFd,
238         kill_evt: EventFd,
239     ) -> Result<(), NetError> {
240         #[derive(PollToken)]
241         enum Token {
242             // A frame is available for reading from the tap device to receive in the guest.
243             RxTap,
244             // The guest has made a buffer available to receive a frame into.
245             RxQueue,
246             // The transmit queue has a frame that is ready to send from the guest.
247             TxQueue,
248             // Check if any interrupts need to be re-asserted.
249             InterruptResample,
250             // crosvm has requested the device to shut down.
251             Kill,
252         }
253 
254         let poll_ctx: PollContext<Token> = PollContext::new()
255             .and_then(|pc| pc.add(&self.tap, Token::RxTap).and(Ok(pc)))
256             .and_then(|pc| pc.add(&rx_queue_evt, Token::RxQueue).and(Ok(pc)))
257             .and_then(|pc| pc.add(&tx_queue_evt, Token::TxQueue).and(Ok(pc)))
258             .and_then(|pc| {
259                 pc.add(&self.interrupt_resample_evt, Token::InterruptResample)
260                     .and(Ok(pc))
261             })
262             .and_then(|pc| pc.add(&kill_evt, Token::Kill).and(Ok(pc)))
263             .map_err(NetError::CreatePollContext)?;
264 
265         'poll: loop {
266             let events = poll_ctx.wait().map_err(NetError::PollError)?;
267             for event in events.iter_readable() {
268                 match event.token() {
269                     Token::RxTap => {
270                         // Process a deferred frame first if available. Don't read from tap again
271                         // until we manage to receive this deferred frame.
272                         if self.deferred_rx {
273                             if self.rx_single_frame() {
274                                 self.deferred_rx = false;
275                             } else {
276                                 continue;
277                             }
278                         }
279                         self.process_rx();
280                     }
281                     Token::RxQueue => {
282                         if let Err(e) = rx_queue_evt.read() {
283                             error!("net: error reading rx queue EventFd: {}", e);
284                             break 'poll;
285                         }
286                         // There should be a buffer available now to receive the frame into.
287                         if self.deferred_rx && self.rx_single_frame() {
288                             self.deferred_rx = false;
289                         }
290                     }
291                     Token::TxQueue => {
292                         if let Err(e) = tx_queue_evt.read() {
293                             error!("net: error reading tx queue EventFd: {}", e);
294                             break 'poll;
295                         }
296                         self.process_tx();
297                     }
298                     Token::InterruptResample => {
299                         let _ = self.interrupt_resample_evt.read();
300                         if self.interrupt_status.load(Ordering::SeqCst) != 0 {
301                             self.interrupt_evt.write(1).unwrap();
302                         }
303                     }
304                     Token::Kill => break 'poll,
305                 }
306             }
307         }
308         Ok(())
309     }
310 }
311 
312 pub struct Net<T: TapT> {
313     workers_kill_evt: Option<EventFd>,
314     kill_evt: EventFd,
315     tap: Option<T>,
316     avail_features: u64,
317     acked_features: u64,
318 }
319 
320 impl<T> Net<T>
321 where
322     T: TapT,
323 {
324     /// Create a new virtio network device with the given IP address and
325     /// netmask.
new( ip_addr: Ipv4Addr, netmask: Ipv4Addr, mac_addr: MacAddress, ) -> Result<Net<T>, NetError>326     pub fn new(
327         ip_addr: Ipv4Addr,
328         netmask: Ipv4Addr,
329         mac_addr: MacAddress,
330     ) -> Result<Net<T>, NetError> {
331         let tap: T = T::new(true).map_err(NetError::TapOpen)?;
332         tap.set_ip_addr(ip_addr).map_err(NetError::TapSetIp)?;
333         tap.set_netmask(netmask).map_err(NetError::TapSetNetmask)?;
334         tap.set_mac_address(mac_addr)
335             .map_err(NetError::TapSetMacAddress)?;
336 
337         tap.enable().map_err(NetError::TapEnable)?;
338 
339         Net::from(tap)
340     }
341 
342     /// Creates a new virtio network device from a tap device that has already been
343     /// configured.
from(tap: T) -> Result<Net<T>, NetError>344     pub fn from(tap: T) -> Result<Net<T>, NetError> {
345         // This would also validate a tap created by Self::new(), but that's a good thing as it
346         // would ensure that any changes in the creation procedure are matched in the validation.
347         // Plus we still need to set the offload and vnet_hdr_size values.
348         validate_and_configure_tap(&tap)?;
349 
350         let avail_features = 1 << virtio_net::VIRTIO_NET_F_GUEST_CSUM
351             | 1 << virtio_net::VIRTIO_NET_F_CSUM
352             | 1 << virtio_net::VIRTIO_NET_F_GUEST_TSO4
353             | 1 << virtio_net::VIRTIO_NET_F_GUEST_UFO
354             | 1 << virtio_net::VIRTIO_NET_F_HOST_TSO4
355             | 1 << virtio_net::VIRTIO_NET_F_HOST_UFO
356             | 1 << vhost::VIRTIO_F_VERSION_1;
357 
358         let kill_evt = EventFd::new().map_err(NetError::CreateKillEventFd)?;
359         Ok(Net {
360             workers_kill_evt: Some(kill_evt.try_clone().map_err(NetError::CloneKillEventFd)?),
361             kill_evt,
362             tap: Some(tap),
363             avail_features,
364             acked_features: 0u64,
365         })
366     }
367 }
368 
369 // Ensure that the tap interface has the correct flags and sets the offload and VNET header size
370 // to the appropriate values.
validate_and_configure_tap<T: TapT>(tap: &T) -> Result<(), NetError>371 fn validate_and_configure_tap<T: TapT>(tap: &T) -> Result<(), NetError> {
372     let flags = tap.if_flags();
373     let required_flags = [
374         (net_sys::IFF_TAP, "IFF_TAP"),
375         (net_sys::IFF_NO_PI, "IFF_NO_PI"),
376         (net_sys::IFF_VNET_HDR, "IFF_VNET_HDR"),
377     ];
378     let missing_flags = required_flags
379         .iter()
380         .filter_map(
381             |(value, name)| {
382                 if value & flags == 0 {
383                     Some(name)
384                 } else {
385                     None
386                 }
387             },
388         )
389         .collect::<Vec<_>>();
390 
391     if !missing_flags.is_empty() {
392         return Err(NetError::TapValidate(format!(
393             "Missing flags: {:?}",
394             missing_flags
395         )));
396     }
397 
398     // Set offload flags to match the virtio features below.
399     tap.set_offload(
400         net_sys::TUN_F_CSUM | net_sys::TUN_F_UFO | net_sys::TUN_F_TSO4 | net_sys::TUN_F_TSO6,
401     )
402     .map_err(NetError::TapSetOffload)?;
403 
404     let vnet_hdr_size = mem::size_of::<virtio_net_hdr_v1>() as i32;
405     tap.set_vnet_hdr_size(vnet_hdr_size)
406         .map_err(NetError::TapSetVnetHdrSize)?;
407 
408     Ok(())
409 }
410 
411 impl<T> Drop for Net<T>
412 where
413     T: TapT,
414 {
drop(&mut self)415     fn drop(&mut self) {
416         // Only kill the child if it claimed its eventfd.
417         if self.workers_kill_evt.is_none() {
418             // Ignore the result because there is nothing we can do about it.
419             let _ = self.kill_evt.write(1);
420         }
421     }
422 }
423 
424 impl<T> VirtioDevice for Net<T>
425 where
426     T: 'static + TapT,
427 {
keep_fds(&self) -> Vec<RawFd>428     fn keep_fds(&self) -> Vec<RawFd> {
429         let mut keep_fds = Vec::new();
430 
431         if let Some(tap) = &self.tap {
432             keep_fds.push(tap.as_raw_fd());
433         }
434 
435         if let Some(workers_kill_evt) = &self.workers_kill_evt {
436             keep_fds.push(workers_kill_evt.as_raw_fd());
437         }
438 
439         keep_fds
440     }
441 
device_type(&self) -> u32442     fn device_type(&self) -> u32 {
443         TYPE_NET
444     }
445 
queue_max_sizes(&self) -> &[u16]446     fn queue_max_sizes(&self) -> &[u16] {
447         QUEUE_SIZES
448     }
449 
features(&self) -> u64450     fn features(&self) -> u64 {
451         self.avail_features
452     }
453 
ack_features(&mut self, value: u64)454     fn ack_features(&mut self, value: u64) {
455         let mut v = value;
456 
457         // Check if the guest is ACK'ing a feature that we didn't claim to have.
458         let unrequested_features = v & !self.avail_features;
459         if unrequested_features != 0 {
460             warn!("net: virtio net got unknown feature ack: {:x}", v);
461 
462             // Don't count these features as acked.
463             v &= !unrequested_features;
464         }
465         self.acked_features |= v;
466     }
467 
activate( &mut self, mem: GuestMemory, interrupt_evt: EventFd, interrupt_resample_evt: EventFd, status: Arc<AtomicUsize>, mut queues: Vec<Queue>, mut queue_evts: Vec<EventFd>, )468     fn activate(
469         &mut self,
470         mem: GuestMemory,
471         interrupt_evt: EventFd,
472         interrupt_resample_evt: EventFd,
473         status: Arc<AtomicUsize>,
474         mut queues: Vec<Queue>,
475         mut queue_evts: Vec<EventFd>,
476     ) {
477         if queues.len() != 2 || queue_evts.len() != 2 {
478             error!("net: expected 2 queues, got {}", queues.len());
479             return;
480         }
481 
482         if let Some(tap) = self.tap.take() {
483             if let Some(kill_evt) = self.workers_kill_evt.take() {
484                 let acked_features = self.acked_features;
485                 let worker_result =
486                     thread::Builder::new()
487                         .name("virtio_net".to_string())
488                         .spawn(move || {
489                             // First queue is rx, second is tx.
490                             let rx_queue = queues.remove(0);
491                             let tx_queue = queues.remove(0);
492                             let mut worker = Worker {
493                                 mem,
494                                 rx_queue,
495                                 tx_queue,
496                                 tap,
497                                 interrupt_status: status,
498                                 interrupt_evt,
499                                 interrupt_resample_evt,
500                                 rx_buf: [0u8; MAX_BUFFER_SIZE],
501                                 rx_count: 0,
502                                 deferred_rx: false,
503                                 acked_features,
504                             };
505                             let rx_queue_evt = queue_evts.remove(0);
506                             let tx_queue_evt = queue_evts.remove(0);
507                             let result = worker.run(rx_queue_evt, tx_queue_evt, kill_evt);
508                             if let Err(e) = result {
509                                 error!("net worker thread exited with error: {}", e);
510                             }
511                         });
512 
513                 if let Err(e) = worker_result {
514                     error!("failed to spawn virtio_net worker: {}", e);
515                     return;
516                 }
517             }
518         }
519     }
520 }
521