• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::HashMap;
6 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
7 use std::fs::File;
8 use std::io;
9 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
10 use std::io::BufWriter;
11 use std::net::Ipv4Addr;
12 use std::net::Ipv6Addr;
13 use std::time::Duration;
14 use std::time::Instant;
15 
16 use base::error;
17 use base::named_pipes::OverlappedWrapper;
18 use base::named_pipes::PipeConnection;
19 use base::warn;
20 use base::AsRawDescriptor;
21 use base::Descriptor;
22 use base::Error as SysError;
23 use base::Event;
24 use base::EventExt;
25 use base::EventToken;
26 use base::RawDescriptor;
27 use base::Timer;
28 use base::WaitContext;
29 use base::WaitContextExt;
30 use metrics::MetricEventType;
31 use metrics::PeriodicLogger;
32 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
33 use pcap_file::pcap::PcapWriter;
34 use smallvec::SmallVec;
35 use virtio_sys::virtio_net::virtio_net_hdr;
36 use virtio_sys::virtio_net::virtio_net_hdr_mrg_rxbuf;
37 use winapi::shared::minwindef::MAKEWORD;
38 use winapi::um::winnt::LONG;
39 use winapi::um::winnt::SHORT;
40 use winapi::um::winsock2::WSACleanup;
41 use winapi::um::winsock2::WSAEventSelect;
42 use winapi::um::winsock2::WSAGetLastError;
43 use winapi::um::winsock2::WSAPoll;
44 use winapi::um::winsock2::WSAStartup;
45 use winapi::um::winsock2::FD_CLOSE;
46 use winapi::um::winsock2::FD_READ;
47 use winapi::um::winsock2::FD_WRITE;
48 use winapi::um::winsock2::POLLERR;
49 use winapi::um::winsock2::POLLHUP;
50 use winapi::um::winsock2::POLLRDBAND;
51 use winapi::um::winsock2::POLLRDNORM;
52 use winapi::um::winsock2::POLLWRNORM;
53 use winapi::um::winsock2::SOCKET;
54 use winapi::um::winsock2::SOCKET_ERROR;
55 use winapi::um::winsock2::WSADATA;
56 use winapi::um::winsock2::WSAPOLLFD;
57 use zerocopy::AsBytes;
58 
59 use crate::slirp::context::CallbackHandler;
60 use crate::slirp::context::Context;
61 use crate::slirp::context::PollEvents;
62 #[cfg(feature = "slirp-ring-capture")]
63 use crate::slirp::packet_ring_buffer::PacketRingBuffer;
64 use crate::slirp::SlirpError;
65 use crate::slirp::ETHERNET_FRAME_SIZE;
66 use crate::Error;
67 use crate::Result;
68 
69 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
70 const SLIRP_CAPTURE_FILE_NAME: &str = "slirp_capture.pcap";
71 
72 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
73 const PCAP_FILE_BUFFER_SIZE: usize = 1024 * 1024; // 1MiB
74 
75 const VETH_HEADER_LENGTH: usize = 12;
76 
77 #[cfg(feature = "slirp-ring-capture")]
78 const PACKET_RING_BUFFER_SIZE_IN_BYTES: usize = 30000000; // 30MBs
79 
80 struct Handler {
81     start: Instant,
82     pipe: PipeConnection,
83     read_overlapped_wrapper: OverlappedWrapper,
84     buf: [u8; ETHERNET_FRAME_SIZE],
85     write_overlapped_wrapper: OverlappedWrapper,
86     // Stores the actual timer (Event) and callback. Note that Event ownership is held by libslirp,
87     // and created/released via `timer_new` and `timer_free`.
88     timer_callbacks: HashMap<RawDescriptor, Box<dyn FnMut()>>,
89     #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
90     pcap_writer: PcapWriter<BufWriter<File>>,
91     #[cfg(feature = "slirp-ring-capture")]
92     tx_packet_ring_buffer: PacketRingBuffer,
93     #[cfg(feature = "slirp-ring-capture")]
94     rx_packet_ring_buffer: PacketRingBuffer,
95     tx_logger: PeriodicLogger,
96     rx_logger: PeriodicLogger,
97 }
98 
99 impl CallbackHandler for Handler {
100     type Timer = base::Timer;
101 
clock_get_ns(&mut self) -> i64102     fn clock_get_ns(&mut self) -> i64 {
103         const NANOS_PER_SEC: u64 = 1_000_000_000;
104         let running_duration = self.start.elapsed();
105         (running_duration.as_secs() * NANOS_PER_SEC + running_duration.subsec_nanos() as u64) as i64
106     }
107 
108     /// Sends a packet to the guest.
send_packet(&mut self, buf: &[u8]) -> io::Result<usize>109     fn send_packet(&mut self, buf: &[u8]) -> io::Result<usize> {
110         let vnet_hdr = virtio_net_hdr_mrg_rxbuf {
111             hdr: virtio_net_hdr {
112                 flags: 0,
113                 gso_size: 0,
114                 hdr_len: 0,
115                 csum_start: 0,
116                 csum_offset: 0,
117                 gso_type: virtio_sys::virtio_net::VIRTIO_NET_HDR_GSO_NONE as u8,
118             },
119             num_buffers: 1,
120         };
121         let send_buf = [vnet_hdr.as_bytes(), buf].concat();
122 
123         #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
124         let d = self.start.elapsed();
125         #[cfg(feature = "slirp-debug")]
126         {
127             self.pcap_writer
128                 .write(d.as_secs() as u32, d.subsec_nanos(), buf, buf.len() as u32)
129                 .unwrap();
130         }
131         #[cfg(feature = "slirp-ring-capture")]
132         {
133             self.tx_packet_ring_buffer
134                 .add_packet(buf, d)
135                 .expect("Failed to add packet.");
136         }
137         // Log as rx from the guest's perspective
138         self.rx_logger.log(buf.len() as i64);
139         // SAFETY: safe because the operation ends with send_buf and
140         // write_overlapped_wrapper still in scope.
141         unsafe {
142             self.pipe
143                 .write_overlapped(&send_buf, &mut self.write_overlapped_wrapper)?;
144         }
145         self.pipe
146             .get_overlapped_result(&mut self.write_overlapped_wrapper)
147             .map(|x| x as usize)
148     }
149 
150     // Not required per https://github.com/rootless-containers/slirp4netns/blob/7f6a4a654a84d4356c881a10417bab77fd5be325/slirp4netns.c
register_poll_fd(&mut self, _fd: i32)151     fn register_poll_fd(&mut self, _fd: i32) {}
unregister_poll_fd(&mut self, _fd: i32)152     fn unregister_poll_fd(&mut self, _fd: i32) {}
153 
guest_error(&mut self, msg: &str)154     fn guest_error(&mut self, msg: &str) {
155         warn!("guest error: {}", msg);
156     }
157 
158     // Not required per https://github.com/rootless-containers/slirp4netns/blob/7f6a4a654a84d4356c881a10417bab77fd5be325/slirp4netns.c
notify(&mut self)159     fn notify(&mut self) {}
160 
timer_new(&mut self, callback: Box<dyn FnMut()>) -> Box<Self::Timer>161     fn timer_new(&mut self, callback: Box<dyn FnMut()>) -> Box<Self::Timer> {
162         let timer = Timer::new().expect("failed to create network timer");
163         self.timer_callbacks
164             .insert(timer.as_raw_descriptor(), callback);
165         Box::new(timer)
166     }
167 
timer_mod(&mut self, timer: &mut Self::Timer, expire_time: i64)168     fn timer_mod(&mut self, timer: &mut Self::Timer, expire_time: i64) {
169         // expire_time is a clock_get_ns relative deadline.
170         let timer_duration = Duration::from_millis(expire_time as u64)
171             - Duration::from_nanos(self.clock_get_ns() as u64);
172 
173         timer
174             .reset(timer_duration, None)
175             .expect("failed to modify network timer");
176     }
177 
timer_free(&mut self, timer: Box<Self::Timer>)178     fn timer_free(&mut self, timer: Box<Self::Timer>) {
179         self.timer_callbacks.remove(&timer.as_raw_descriptor());
180         // The actual Timer is freed implicitly by the Box drop.
181     }
182 
get_timers<'a>(&'a self) -> Box<dyn Iterator<Item = &RawDescriptor> + 'a>183     fn get_timers<'a>(&'a self) -> Box<dyn Iterator<Item = &RawDescriptor> + 'a> {
184         Box::new(self.timer_callbacks.keys())
185     }
186 
execute_timer(&mut self, timer: RawDescriptor)187     fn execute_timer(&mut self, timer: RawDescriptor) {
188         let timer_callback = self
189             .timer_callbacks
190             .get_mut(&timer)
191             .expect("tried to run timer that has no callback");
192         timer_callback()
193     }
194 
begin_read_from_guest(&mut self) -> io::Result<()>195     fn begin_read_from_guest(&mut self) -> io::Result<()> {
196         // Safe because we are writing simple bytes.
197         unsafe {
198             self.pipe
199                 .read_overlapped(&mut self.buf, &mut self.read_overlapped_wrapper)
200         }
201     }
202 
end_read_from_guest(&mut self) -> io::Result<&[u8]>203     fn end_read_from_guest(&mut self) -> io::Result<&[u8]> {
204         #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
205         let d = self.start.elapsed();
206         match self
207             .pipe
208             .try_get_overlapped_result(&mut self.read_overlapped_wrapper)
209         {
210             Ok(len) if len as usize >= VETH_HEADER_LENGTH => {
211                 // Skip over the veth header (12 bytes, created by the frontend per the
212                 // virtio spec).
213                 let ethernet_pkt = &self.buf[VETH_HEADER_LENGTH..len as usize];
214 
215                 #[cfg(feature = "slirp-debug")]
216                 {
217                     self.pcap_writer
218                         .write(
219                             d.as_secs() as u32,
220                             d.subsec_nanos(),
221                             ethernet_pkt,
222                             (len - VETH_HEADER_LENGTH) as u32,
223                         )
224                         .unwrap();
225                 }
226                 #[cfg(feature = "slirp-ring-capture")]
227                 {
228                     self.rx_packet_ring_buffer
229                         .add_packet(ethernet_pkt, d)
230                         .expect("Failed to add packet.");
231                 }
232                 // Log as tx from the guest's perspective
233                 self.tx_logger.log(len as i64);
234                 Ok(ethernet_pkt)
235             }
236             Ok(len) => Err(io::Error::new(
237                 io::ErrorKind::InvalidData,
238                 format!(
239                     "Too few bytes ({}) read from the guest's virtio-net frontend.",
240                     len
241                 ),
242             )),
243             Err(e) => Err(e),
244         }
245     }
246 }
247 
248 #[cfg(feature = "slirp-ring-capture")]
249 impl Drop for Handler {
drop(&mut self)250     fn drop(&mut self) {
251         let packets = PacketRingBuffer::pop_ring_buffers_and_aggregate(
252             &mut self.rx_packet_ring_buffer,
253             &mut self.tx_packet_ring_buffer,
254         );
255 
256         for packet in packets {
257             self.pcap_writer
258                 .write(
259                     packet.timestamp.as_secs() as u32,
260                     packet.timestamp.subsec_nanos(),
261                     &packet.buf,
262                     packet.buf.len() as u32,
263                 )
264                 .unwrap()
265         }
266     }
267 }
268 
last_wsa_error() -> io::Error269 fn last_wsa_error() -> io::Error {
270     io::Error::from_raw_os_error(unsafe { WSAGetLastError() })
271 }
272 
poll_sockets(mut sockets: Vec<WSAPOLLFD>) -> io::Result<Vec<WSAPOLLFD>>273 fn poll_sockets(mut sockets: Vec<WSAPOLLFD>) -> io::Result<Vec<WSAPOLLFD>> {
274     // Safe because sockets is guaranteed to be valid, and we handle error return codes below.
275     let poll_result = unsafe {
276         WSAPoll(
277             sockets.as_mut_ptr() as *mut WSAPOLLFD,
278             sockets.len() as u32,
279             1, /* timeout in ms */
280         )
281     };
282 
283     match poll_result {
284         SOCKET_ERROR => Err(last_wsa_error()),
285         _ => Ok(sockets),
286     }
287 }
288 
289 /// Converts WSA poll events into the network event bitfield used by WSAEventSelect.
wsa_events_to_wsa_network_events(events: SHORT) -> LONG290 fn wsa_events_to_wsa_network_events(events: SHORT) -> LONG {
291     let mut net_events = 0;
292     if events & (POLLRDNORM | POLLRDBAND) != 0 {
293         net_events |= FD_READ;
294     }
295     if events & POLLWRNORM > 0 {
296         net_events |= FD_WRITE;
297     }
298     net_events
299 }
300 
wsa_events_to_slirp_events(events: SHORT) -> PollEvents301 fn wsa_events_to_slirp_events(events: SHORT) -> PollEvents {
302     // On Windows, revents have the following meaning:
303     // Linux POLLIN == POLLRDBAND | POLLRDNORM
304     // Linux POLLOUT == POLLWRNORM
305     // Linux POLLERR == POLLERR
306     // Windows: POLLPRI is not implemented.
307     // POLLNVAL is not a supported Slirp polling flag.
308     // Further details at
309     //      https://docs.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsapoll
310     let mut poll_events = PollEvents::empty();
311     if events & (POLLRDNORM | POLLRDBAND) != 0 {
312         poll_events |= PollEvents::poll_in();
313     }
314     if events & POLLWRNORM != 0 {
315         poll_events |= PollEvents::poll_out();
316     }
317     if events & POLLERR != 0 {
318         poll_events |= PollEvents::poll_err();
319     }
320     if events & POLLHUP != 0 {
321         poll_events |= PollEvents::poll_hup();
322     }
323     poll_events
324 }
325 
slirp_events_to_wsa_events(events: PollEvents) -> SHORT326 fn slirp_events_to_wsa_events(events: PollEvents) -> SHORT {
327     // Note that the events that get sent into WSAPoll are a subset of the events that are returned
328     // by WSAPoll. As such, this function is not an inverse of wsa_events_to_slirp_events.
329     let mut wsa_events: SHORT = 0;
330     if events.has_in() {
331         wsa_events |= POLLRDNORM | POLLRDBAND;
332     }
333     if events.has_out() {
334         wsa_events |= POLLWRNORM;
335     }
336     // NOTE: POLLHUP cannot be supplied to WSAPoll.
337 
338     wsa_events
339 }
340 
341 #[derive(EventToken, Eq, PartialEq, Copy, Clone)]
342 enum Token {
343     EventHandleReady(usize),
344     SocketReady,
345 }
346 
347 /// Associates a WSAPOLLFD's events with an Event object, disassociating on drop.
348 struct EventSelectedSocket<'a> {
349     socket: WSAPOLLFD,
350     event: &'a Event,
351 }
352 
353 impl<'a> EventSelectedSocket<'a> {
new(socket: WSAPOLLFD, event: &'a Event) -> Result<EventSelectedSocket>354     fn new(socket: WSAPOLLFD, event: &'a Event) -> Result<EventSelectedSocket> {
355         // Safe because socket.fd exists, the event handle is guaranteed to exist, and we check the
356         // return code below.
357         let res = unsafe {
358             WSAEventSelect(
359                 socket.fd as SOCKET,
360                 event.as_raw_descriptor(),
361                 // Because WSAPOLLFD cannot contain POLLHUP (even if libslirp wanted to specify it,
362                 // WSAPoll does not accept it), we assume it is always present.
363                 wsa_events_to_wsa_network_events(socket.events) | FD_CLOSE,
364             )
365         };
366         if res == SOCKET_ERROR {
367             return Err(Error::Slirp(SlirpError::SlirpIOPollError(last_wsa_error())));
368         }
369         Ok(EventSelectedSocket { socket, event })
370     }
371 }
372 
373 impl<'a> Drop for EventSelectedSocket<'a> {
drop(&mut self)374     fn drop(&mut self) {
375         // Safe because socket.fd exists, the event handle is guaranteed to exist, and we check the
376         // return code below.
377         let res = unsafe {
378             WSAEventSelect(
379                 self.socket.fd as SOCKET,
380                 self.event.as_raw_descriptor(),
381                 /* listen for no events */ 0,
382             )
383         };
384         if res == SOCKET_ERROR {
385             warn!("failed to unselect socket: {}", last_wsa_error());
386         }
387     }
388 }
389 
390 /// Rough equivalent of select(...) for Windows.
391 /// The following behavior is guaranteed:
392 ///   1. The position of sockets in the sockets vector is maintained on return.
393 ///   2. Sockets are always polled on any wakeup.
394 ///
395 /// For optimization reasons, takes a utility event & WaitContext to avoid having to re-create
396 /// those objects if poll is called from an event loop. The Event and WaitContext MUST NOT be used
397 /// for any other purpose in between calls to `poll`.
poll<'a>( wait_ctx: &WaitContext<Token>, socket_event_handle: &Event, handles: Vec<&'a dyn AsRawDescriptor>, sockets: Vec<WSAPOLLFD>, timeout: Option<Duration>, ) -> Result<(Vec<&'a dyn AsRawDescriptor>, Vec<WSAPOLLFD>)>398 fn poll<'a>(
399     wait_ctx: &WaitContext<Token>,
400     socket_event_handle: &Event,
401     handles: Vec<&'a dyn AsRawDescriptor>,
402     sockets: Vec<WSAPOLLFD>,
403     timeout: Option<Duration>,
404 ) -> Result<(Vec<&'a dyn AsRawDescriptor>, Vec<WSAPOLLFD>)> {
405     let mut selected_sockets = Vec::with_capacity(sockets.len());
406     for socket in sockets.iter() {
407         selected_sockets.push(EventSelectedSocket::new(*socket, socket_event_handle)?);
408     }
409 
410     wait_ctx
411         .clear()
412         .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
413     for (i, handle) in handles.iter().enumerate() {
414         match wait_ctx.add(*handle, Token::EventHandleReady(i)) {
415             Ok(v) => v,
416             Err(e) => {
417                 return Err(Error::Slirp(SlirpError::SlirpPollError(e)));
418             }
419         }
420     }
421     match wait_ctx.add(socket_event_handle, Token::SocketReady) {
422         Ok(v) => v,
423         Err(e) => {
424             return Err(Error::Slirp(SlirpError::SlirpPollError(e)));
425         }
426     }
427 
428     let events = if let Some(timeout) = timeout {
429         wait_ctx
430             .wait_timeout(timeout)
431             .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?
432     } else {
433         wait_ctx
434             .wait()
435             .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?
436     };
437 
438     let tokens: Vec<Token> = events
439         .iter()
440         .filter(|e| e.is_readable)
441         .map(|e| e.token)
442         .collect();
443     let mut handle_results = Vec::new();
444     for token in tokens {
445         match token {
446             Token::EventHandleReady(i) => {
447                 handle_results.push(handles[i]);
448             }
449             Token::SocketReady => {
450                 // We always call poll_sockets, so whether the token is present doesn't matter.
451             }
452         };
453     }
454 
455     let socket_results = if sockets.is_empty() {
456         Vec::new()
457     } else {
458         poll_sockets(sockets).map_err(|e| Error::Slirp(SlirpError::SlirpIOPollError(e)))?
459     };
460 
461     Ok((handle_results, socket_results))
462 }
463 
464 /// Opens a WSAStartup/WSACleanup context; in other words, while a context is held, winsock calls
465 /// can be made.
466 struct WSAContext {
467     data: WSADATA,
468 }
469 
470 impl WSAContext {
new() -> Result<WSAContext>471     fn new() -> Result<WSAContext> {
472         // Trivially safe (initialization of this memory is not required).
473         let mut ctx: WSAContext = unsafe { std::mem::zeroed() };
474 
475         // Safe because ctx.data is guaranteed to exist, and we check the return code.
476         let err = unsafe { WSAStartup(MAKEWORD(2, 0), &mut ctx.data) };
477         if err != 0 {
478             Err(Error::Slirp(SlirpError::WSAStartupError(SysError::new(
479                 err,
480             ))))
481         } else {
482             Ok(ctx)
483         }
484     }
485 }
486 
487 impl Drop for WSAContext {
drop(&mut self)488     fn drop(&mut self) {
489         let err = unsafe { WSACleanup() };
490         if err != 0 {
491             error!("WSACleanup failed: {}", last_wsa_error())
492         }
493     }
494 }
495 
496 /// Starts libslirp's main loop attached to host_pipe. Packets are exchanged between host_pipe and
497 /// the host's network stack.
498 ///
499 /// host_pipe must be non blocking & in message mode.
start_slirp( host_pipe: PipeConnection, shutdown_event: Event, disable_access_to_host: bool, #[cfg(feature = "slirp-ring-capture")] slirp_capture_file: Option<String>, ) -> Result<()>500 pub fn start_slirp(
501     host_pipe: PipeConnection,
502     shutdown_event: Event,
503     disable_access_to_host: bool,
504     #[cfg(feature = "slirp-ring-capture")] slirp_capture_file: Option<String>,
505 ) -> Result<()> {
506     // This call is not strictly required because libslirp currently calls WSAStartup for us, but
507     // relying on that is brittle and a potential source of bugs as we have our own socket code that
508     // runs on the Rust side.
509     let _wsa_context = WSAContext::new()?;
510 
511     let (mut context, host_pipe_notifier_handle) = create_slirp_context(
512         host_pipe,
513         disable_access_to_host,
514         #[cfg(feature = "slirp-ring-capture")]
515         slirp_capture_file,
516     )?;
517     let shutdown_event_handle = shutdown_event.as_raw_descriptor();
518 
519     // Stack data for the poll function.
520     let wait_ctx: WaitContext<Token> =
521         WaitContext::new().map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
522     let socket_event_handle =
523         Event::new_auto_reset().map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
524 
525     'slirp: loop {
526         // Request the FDs that we should poll from Slirp. Slirp provides them to us by way of a
527         // callback, which is invoked for each FD. This callback requires us to assign each FD an index
528         // which will be used by a subsequent Slirp call to get the poll events for each FD. The data
529         // flow can be thought of as follows:
530         //    1. pollfds_fill creates a map of index -> fd inside Slirp based on the return values from
531         //       the pollfds_fill callback.
532         //    2. crosvm invokes poll on the FDs provided by Slirp.
533         //    3. crosvm notifies Slirp via pollfds_poll that polling completed for the provided FDs.
534         //    4. Slirp calls into crosvm via the pollfds_poll callback and asks for the statuses using
535         //       the fd indicies registered in step #1.
536         let mut poll_fds = Vec::new();
537         // We'd like to sleep as long as possible (assuming no actionable notifications arrive).
538         let mut timeout_ms: u32 = u32::MAX;
539         context.pollfds_fill(&mut timeout_ms, |fd: i32, events: PollEvents| {
540             poll_fds.push(WSAPOLLFD {
541                 fd: fd as usize,
542                 events: slirp_events_to_wsa_events(events),
543                 revents: 0,
544             });
545             (poll_fds.len() - 1) as i32
546         });
547 
548         // There are relatively few concurrent timer_callbacks used by libslirp, so we set the small vector
549         // size low.
550         let timer_callbacks = context
551             .get_timers()
552             .map(|timer| Descriptor(*timer))
553             .collect::<SmallVec<[Descriptor; 8]>>();
554         let mut handles: Vec<&dyn AsRawDescriptor> = Vec::with_capacity(timer_callbacks.len() + 2);
555         handles.extend(
556             timer_callbacks
557                 .iter()
558                 .map(|timer| timer as &dyn AsRawDescriptor),
559         );
560 
561         let host_pipe_notifier = Descriptor(host_pipe_notifier_handle);
562         handles.push(&host_pipe_notifier);
563         handles.push(&shutdown_event);
564 
565         let (handle_results, socket_results) = poll(
566             &wait_ctx,
567             &socket_event_handle,
568             handles,
569             poll_fds,
570             Some(Duration::from_millis(timeout_ms.into())),
571         )?;
572 
573         for handle in handle_results.iter() {
574             match handle.as_raw_descriptor() {
575                 h if h == host_pipe_notifier_handle => {
576                     // Collect input from the guest & inject into Slirp. It seems that this input
577                     // step should be between pollfds_fill & pollfds_poll.
578                     context.handle_guest_input()?;
579                 }
580                 h if h == shutdown_event_handle => {
581                     break 'slirp;
582                 }
583                 timer_handle => {
584                     // All other handles are timer_callbacks.
585                     context.execute_timer(timer_handle);
586                 }
587             }
588         }
589 
590         // It's possible no socket notified and we got here from a timeout. This is fine, because
591         // libslirp wants to be woken up if timeout has expired (even if no sockets are ready).
592         context.pollfds_poll(false, |fd_index: i32| {
593             wsa_events_to_slirp_events(socket_results[fd_index as usize].revents)
594         })
595     }
596 
597     // Never reached.
598     Ok(())
599 }
600 
601 /// Creates the slirp capture file.
602 ///
603 /// Try to create a file in the user provided path. If no path is provided, or
604 /// if creation at that path fails, create in current directory (named
605 /// `SLIRP_CAPTURE_FILE_NAME`).
606 #[cfg(feature = "slirp-ring-capture")]
create_slirp_capture_file(slirp_capture_file: Option<String>) -> File607 fn create_slirp_capture_file(slirp_capture_file: Option<String>) -> File {
608     if let Some(slirp_capture_file) = slirp_capture_file {
609         match File::create(&slirp_capture_file) {
610             Ok(file) => file,
611             Err(e) => {
612                 warn!(
613                     "Unable to save slirp capture packets file to {}, \
614                 Saving file to current directory. Error: {}",
615                     slirp_capture_file, e
616                 );
617                 File::create(SLIRP_CAPTURE_FILE_NAME).unwrap()
618             }
619         }
620     } else {
621         warn!(
622             "run parameter --slirp-capture-file not specified. Saving file to current directory."
623         );
624         File::create(SLIRP_CAPTURE_FILE_NAME).unwrap()
625     }
626 }
627 
create_slirp_context( host_pipe: PipeConnection, disable_access_to_host: bool, #[cfg(feature = "slirp-ring-capture")] slirp_capture_file: Option<String>, ) -> Result<(Box<Context<Handler>>, RawDescriptor)>628 fn create_slirp_context(
629     host_pipe: PipeConnection,
630     disable_access_to_host: bool,
631     #[cfg(feature = "slirp-ring-capture")] slirp_capture_file: Option<String>,
632 ) -> Result<(Box<Context<Handler>>, RawDescriptor)> {
633     #[cfg(feature = "slirp-ring-capture")]
634     let slirp_captured_packets_file = create_slirp_capture_file(slirp_capture_file);
635     #[cfg(all(not(feature = "slirp-ring-capture"), feature = "slirp-debug"))]
636     let slirp_captured_packets_file = File::create(SLIRP_CAPTURE_FILE_NAME).unwrap();
637     let overlapped_wrapper = OverlappedWrapper::new(true).unwrap();
638     let read_notifier = overlapped_wrapper
639         .get_h_event_ref()
640         .unwrap()
641         .as_raw_descriptor();
642     let handler = Handler {
643         start: Instant::now(),
644         pipe: host_pipe,
645         read_overlapped_wrapper: overlapped_wrapper,
646         buf: [0; ETHERNET_FRAME_SIZE],
647         write_overlapped_wrapper: OverlappedWrapper::new(true).unwrap(),
648         timer_callbacks: HashMap::new(),
649         #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
650         pcap_writer: PcapWriter::new(BufWriter::with_capacity(
651             PCAP_FILE_BUFFER_SIZE,
652             slirp_captured_packets_file,
653         ))
654         .unwrap(),
655         #[cfg(feature = "slirp-ring-capture")]
656         tx_packet_ring_buffer: PacketRingBuffer::new(PACKET_RING_BUFFER_SIZE_IN_BYTES),
657         #[cfg(feature = "slirp-ring-capture")]
658         rx_packet_ring_buffer: PacketRingBuffer::new(PACKET_RING_BUFFER_SIZE_IN_BYTES),
659         tx_logger: PeriodicLogger::new(MetricEventType::NetworkTxRate, Duration::from_secs(1))
660             .unwrap(),
661         rx_logger: PeriodicLogger::new(MetricEventType::NetworkRxRate, Duration::from_secs(1))
662             .unwrap(),
663     };
664 
665     // Address & mask of the virtual network.
666     let v4_network_addr = Ipv4Addr::new(10, 0, 2, 0);
667     let v4_network_mask = Ipv4Addr::new(255, 255, 255, 0);
668 
669     // Address of the host machine on the virtual network (if the feature is enabled).
670     let host_v4_addr = Ipv4Addr::new(10, 0, 2, 2);
671 
672     // Address of the libslirp provided DNS proxy (packets to this address are intercepted by
673     // libslirp & routed to the first nameserver configured on the machine's NICs by libslirp).
674     let dns_addr = Ipv4Addr::new(10, 0, 2, 3);
675 
676     // DHCP range should start *after* the statically assigned addresses.
677     let dhcp_start_addr = Ipv4Addr::new(10, 0, 2, 4);
678 
679     // IPv6 network address. This is a ULA (unique local address) network, with a randomly generated
680     // ID (0x13624603218). The "prefix" or network address is 64 bits, incorporating both the
681     // network ID, and the subnet (0x0001).
682     let v6_network_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 0);
683 
684     let v6_host_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 2);
685     let v6_dns_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 3);
686     Ok((
687         Context::new(
688             disable_access_to_host,
689             /* IPv4 enabled */
690             true,
691             v4_network_addr,
692             v4_network_mask,
693             host_v4_addr,
694             /* IPv6 enabled */ true,
695             v6_network_addr,
696             /* virtual_network_v6_prefix_len */ 64,
697             /* host_v6_address */ v6_host_addr,
698             /* host_hostname */ None,
699             dhcp_start_addr,
700             dns_addr,
701             /* dns_server_v6_addr */ v6_dns_addr,
702             /* virtual_network_dns_search_domains */ Vec::new(),
703             /* dns_server_domain_name */ None,
704             handler,
705         )?,
706         read_notifier,
707     ))
708 }
709 
710 #[cfg(test)]
711 mod tests {
712     use std::net::UdpSocket;
713     use std::os::windows::io::AsRawSocket;
714 
715     use base::named_pipes;
716     use base::named_pipes::BlockingMode;
717     use base::named_pipes::FramingMode;
718 
719     use super::super::SLIRP_BUFFER_SIZE;
720     use super::*;
721 
create_socket() -> (UdpSocket, WSAPOLLFD)722     fn create_socket() -> (UdpSocket, WSAPOLLFD) {
723         let socket = UdpSocket::bind("127.0.0.1:0").unwrap();
724         socket
725             .set_nonblocking(true)
726             .expect("Socket failed to set non_blocking.");
727 
728         let poll_fd = WSAPOLLFD {
729             fd: socket.as_raw_socket() as usize,
730             events: POLLRDNORM | POLLRDBAND, // POLLIN equivalent
731             revents: 0,
732         };
733 
734         (socket, poll_fd)
735     }
736 
create_readable_socket() -> (UdpSocket, WSAPOLLFD)737     fn create_readable_socket() -> (UdpSocket, WSAPOLLFD) {
738         let (socket, poll_fd) = create_socket();
739         let receiving_addr = socket.local_addr().unwrap();
740         let buf = [0; 10];
741         socket.send_to(&buf, receiving_addr).unwrap();
742 
743         // Wait for the socket to really be readable before we return it back to the test. We've
744         // seen cases in CI where send_to completes, but WSAPoll won't find the socket to be
745         // readable.
746         let mut sockets = vec![poll_fd];
747         for _ in 0..5 {
748             sockets = poll_sockets(sockets).expect("poll_sockets failed");
749             if sockets[0].revents & (POLLRDNORM | POLLRDBAND) > 0 {
750                 return (socket, poll_fd);
751             }
752         }
753         panic!("socket never became readable");
754     }
755 
756     #[test]
test_polling_timeout_works()757     fn test_polling_timeout_works() {
758         let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
759         let socket_event_handle = Event::new_auto_reset().unwrap();
760 
761         let (_socket, poll_fd) = create_socket();
762         let event_fd = Event::new_auto_reset().unwrap();
763         let (handles, sockets) = poll(
764             &wait_ctx,
765             &socket_event_handle,
766             vec![&event_fd],
767             vec![poll_fd],
768             Some(Duration::from_millis(2)),
769         )
770         .unwrap();
771 
772         // Asserts that we woke up because of a timeout.
773         assert_eq!(handles.len(), 0);
774         assert_eq!(sockets[0].revents, 0);
775     }
776 
777     #[test]
test_polling_handle_only()778     fn test_polling_handle_only() {
779         let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
780         let socket_event_handle = Event::new_auto_reset().unwrap();
781 
782         // Required to ensure winsock is ready (needed by poll).
783         let (_sock, _poll_fd) = create_readable_socket();
784 
785         let event_fd = Event::new_auto_reset().unwrap();
786         event_fd.signal().expect("Failed to write event");
787         let (handles, _sockets) = poll(
788             &wait_ctx,
789             &socket_event_handle,
790             vec![&event_fd],
791             Vec::new(),
792             None,
793         )
794         .unwrap();
795 
796         assert_eq!(handles.len(), 1);
797         assert_eq!(handles[0].as_raw_descriptor(), event_fd.as_raw_descriptor());
798     }
799 
800     #[test]
test_polling_socket_only()801     fn test_polling_socket_only() {
802         let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
803         let socket_event_handle = Event::new_auto_reset().unwrap();
804 
805         let (sock, poll_fd) = create_readable_socket();
806         let (_handles, sockets) = poll(
807             &wait_ctx,
808             &socket_event_handle,
809             Vec::new(),
810             vec![poll_fd],
811             None,
812         )
813         .unwrap();
814 
815         assert_eq!(sockets.len(), 1);
816         assert_eq!(sockets[0].fd, sock.as_raw_socket() as usize);
817     }
818 
819     #[test]
test_polling_two_notifies()820     fn test_polling_two_notifies() {
821         let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
822         let socket_event_handle = Event::new_auto_reset().unwrap();
823 
824         let (sock, poll_fd) = create_readable_socket();
825         let event_fd = Event::new_auto_reset().unwrap();
826         event_fd.signal().expect("Failed to write event");
827 
828         let (handles, sockets) = poll(
829             &wait_ctx,
830             &socket_event_handle,
831             vec![&event_fd],
832             vec![poll_fd],
833             None,
834         )
835         .unwrap();
836 
837         assert_eq!(sockets.len(), 1);
838         assert_eq!(sockets[0].fd, sock.as_raw_socket() as usize);
839 
840         assert_eq!(handles.len(), 1);
841         assert_eq!(handles[0].as_raw_descriptor(), event_fd.as_raw_descriptor());
842     }
843 
844     #[test]
test_slirp_stops_on_shutdown()845     fn test_slirp_stops_on_shutdown() {
846         let event_fd = Event::new_auto_reset().unwrap();
847         let (host_pipe, mut _guest_pipe) = named_pipes::pair_with_buffer_size(
848             &FramingMode::Message,
849             &BlockingMode::Wait,
850             0,
851             SLIRP_BUFFER_SIZE,
852             true,
853         )
854         .unwrap();
855         event_fd.signal().expect("Failed to write event");
856         start_slirp(
857             host_pipe,
858             event_fd.try_clone().unwrap(),
859             /* disable_access_to_host=*/ false,
860             #[cfg(feature = "slirp-ring-capture")]
861             None,
862         )
863         .expect("Failed to start slirp");
864     }
865 
866     // A gratuitous ARP from 52:55:0A:00:02:0F for IP 10.0.2.15
867     const VETH_ARP_ANNOUNCEMENT: [u8; 54] = [
868         // VETH header
869         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
870         // Ethernet frame
871         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x08, 0x06, 0x00,
872         0x01, 0x08, 0x00, 0x06, 0x04, 0x00, 0x01, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x0a, 0x00,
873         0x02, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0x02, 0x0f,
874     ];
875 
876     // TCP SYN from 52:55:0A:00:02:0F to 52:55:0A:00:02:01 (latter MAC should be arbitrary with Slirp)
877     // IP 10.0.2.15(5678) -> 127.0.0.1(19422)
878     // Note: MAC addresses in Slirp are arbitrary
879     const VETH_TCP_SYN: [u8; 66] = [
880         // VETH header
881         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
882         // Ethernet frame
883         0x52, 0x55, 0x0a, 0x00, 0x02, 0x01, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x08, 0x00, 0x45,
884         0x00, 0x00, 0x28, 0x12, 0x34, 0x40, 0x00, 0xff, 0x06, 0xde, 0x8b, 0x0a, 0x00, 0x02, 0x0f,
885         0x7f, 0x00, 0x00, 0x01, 0x16, 0x2e, 0x4b, 0xde, 0x00, 0x00, 0x04, 0xd2, 0x00, 0x00, 0x0d,
886         0x80, 0x50, 0x02, 0x0f, 0xa0, 0xa0, 0xd4, 0x00, 0x00,
887     ];
888 
889     // This is built into the TCP_SYN packet above; changing it will require a change to the TCP
890     // checksum
891     const LOOPBACK_SOCKET: &str = "127.0.0.1:19422";
892 
893     const TIMEOUT_MILLIS: u64 = 400;
894 
895     #[test]
test_send_tcp_syn()896     fn test_send_tcp_syn() {
897         use std::net::TcpListener;
898         use std::thread;
899         use std::time::Duration;
900 
901         let (mut guest_pipe, host_pipe) = named_pipes::pair_with_buffer_size(
902             &FramingMode::Message,
903             &BlockingMode::Wait,
904             0,
905             SLIRP_BUFFER_SIZE,
906             true,
907         )
908         .unwrap();
909         let mut overlapped_wrapper = OverlappedWrapper::new(true).unwrap();
910 
911         // Start Slirp in another thread
912         let shutdown_sender = Event::new_auto_reset().unwrap();
913         let shutdown_receiver = shutdown_sender.try_clone().unwrap();
914 
915         // Run the slirp handling in a background thread
916         thread::spawn(move || {
917             start_slirp(
918                 host_pipe,
919                 shutdown_receiver,
920                 /* disable_access_to_host=*/ false,
921                 #[cfg(feature = "slirp-ring-capture")]
922                 None,
923             )
924             .unwrap();
925         });
926 
927         // Create a timeout thread so the test doesn't block forever if something is amiss
928         thread::spawn(move || {
929             thread::sleep(Duration::from_millis(TIMEOUT_MILLIS));
930             shutdown_sender
931                 .signal()
932                 .expect("Failed to write to shutdown sender");
933         });
934 
935         // Start a local TCP server for our Slirp to connect to
936         let _listener = TcpListener::bind(LOOPBACK_SOCKET).unwrap();
937 
938         // This ARP is required or else Slirp will send us an ARP request before it returns an ACK
939         // SAFETY: safe because the buffer & overlapped wrapper are in scope for
940         // the duration of the overlapped operation.
941         unsafe {
942             guest_pipe
943                 .write_overlapped(&VETH_ARP_ANNOUNCEMENT, &mut overlapped_wrapper)
944                 .expect("Failed to write ARP to guest pipe");
945         }
946         guest_pipe
947             .get_overlapped_result(&mut overlapped_wrapper)
948             .unwrap();
949         // SAFETY: safe because the buffer & overlapped wrapper are in scope for
950         // the duration of the overlapped operation.
951         unsafe {
952             guest_pipe
953                 .write_overlapped(&VETH_TCP_SYN, &mut overlapped_wrapper)
954                 .expect("Failed to write SYN to guest pipe")
955         };
956         guest_pipe
957             .get_overlapped_result(&mut overlapped_wrapper)
958             .unwrap();
959 
960         let mut recv_buffer: [u8; 512] = [0; 512];
961         unsafe { guest_pipe.read_overlapped(&mut recv_buffer, &mut overlapped_wrapper) }.unwrap();
962         let size = guest_pipe
963             .get_overlapped_result(&mut overlapped_wrapper)
964             .unwrap() as usize;
965 
966         // This output is printed to aid in debugging; it can be parsed with https://hpd.gasmi.net/
967         println!("Received frame:");
968         for byte in recv_buffer[0..size].iter() {
969             print!("{:01$x} ", byte, 2);
970         }
971         println!();
972 
973         // This test expects a VETH header + SYN+ACK response. It doesn't inspect every byte of
974         // the response frame because some fields may be dependent on the host or OS.
975         assert_eq!(size, VETH_HEADER_LENGTH + 58);
976 
977         // Strip off the VETH header and ignore it
978         recv_buffer.copy_within(VETH_HEADER_LENGTH.., 0);
979 
980         // Check Ethernet header
981         const ETH_RESPONSE_HEADER: [u8; 14] = [
982             0x52, 0x55, 0x0A, 0x00, 0x02, 0x0F, 0x52, 0x55, 0x0A, 0x00, 0x02, 0x02, 0x08, 0x00,
983         ];
984         assert_eq!(
985             recv_buffer[0..ETH_RESPONSE_HEADER.len()],
986             ETH_RESPONSE_HEADER
987         );
988 
989         // Check source IP
990         assert_eq!(recv_buffer[26..=29], [0x7f, 0x00, 0x00, 0x01]); // 127.0.0.1
991 
992         // Check dest IP
993         assert_eq!(recv_buffer[30..=33], [0x0A, 0x00, 0x02, 0x0F]); // 10.0.2.15
994 
995         // Check source port
996         assert_eq!(recv_buffer[34..=35], [0x4b, 0xde]); // 19422
997 
998         // Check destination port
999         assert_eq!(recv_buffer[36..=37], [0x16, 0x2e]); // 5678
1000 
1001         // Check TCP flags are SYN+ACK
1002         assert_eq!(recv_buffer[47], 0x12);
1003     }
1004 }
1005