• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![unstable(issue = "none", feature = "windows_stdio")]
2 
3 use crate::cmp;
4 use crate::io;
5 use crate::mem::MaybeUninit;
6 use crate::os::windows::io::{FromRawHandle, IntoRawHandle};
7 use crate::ptr;
8 use crate::str;
9 use crate::sys::c;
10 use crate::sys::cvt;
11 use crate::sys::handle::Handle;
12 use core::str::utf8_char_width;
13 
14 #[cfg(test)]
15 mod tests;
16 
17 // Don't cache handles but get them fresh for every read/write. This allows us to track changes to
18 // the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490.
19 pub struct Stdin {
20     surrogate: u16,
21     incomplete_utf8: IncompleteUtf8,
22 }
23 
24 pub struct Stdout {
25     incomplete_utf8: IncompleteUtf8,
26 }
27 
28 pub struct Stderr {
29     incomplete_utf8: IncompleteUtf8,
30 }
31 
32 struct IncompleteUtf8 {
33     bytes: [u8; 4],
34     len: u8,
35 }
36 
37 impl IncompleteUtf8 {
38     // Implemented for use in Stdin::read.
read(&mut self, buf: &mut [u8]) -> usize39     fn read(&mut self, buf: &mut [u8]) -> usize {
40         // Write to buffer until the buffer is full or we run out of bytes.
41         let to_write = cmp::min(buf.len(), self.len as usize);
42         buf[..to_write].copy_from_slice(&self.bytes[..to_write]);
43 
44         // Rotate the remaining bytes if not enough remaining space in buffer.
45         if usize::from(self.len) > buf.len() {
46             self.bytes.copy_within(to_write.., 0);
47             self.len -= to_write as u8;
48         } else {
49             self.len = 0;
50         }
51 
52         to_write
53     }
54 }
55 
56 // Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
57 // #13304 for details).
58 //
59 // From MSDN (2011): "The storage for this buffer is allocated from a shared heap for the
60 // process that is 64 KB in size. The maximum size of the buffer will depend on heap usage."
61 //
62 // We choose the cap at 8 KiB because libuv does the same, and it seems to be acceptable so far.
63 const MAX_BUFFER_SIZE: usize = 8192;
64 
65 // The standard buffer size of BufReader for Stdin should be able to hold 3x more bytes than there
66 // are `u16`'s in MAX_BUFFER_SIZE. This ensures the read data can always be completely decoded from
67 // UTF-16 to UTF-8.
68 pub const STDIN_BUF_SIZE: usize = MAX_BUFFER_SIZE / 2 * 3;
69 
get_handle(handle_id: c::DWORD) -> io::Result<c::HANDLE>70 pub fn get_handle(handle_id: c::DWORD) -> io::Result<c::HANDLE> {
71     let handle = unsafe { c::GetStdHandle(handle_id) };
72     if handle == c::INVALID_HANDLE_VALUE {
73         Err(io::Error::last_os_error())
74     } else if handle.is_null() {
75         Err(io::Error::from_raw_os_error(c::ERROR_INVALID_HANDLE as i32))
76     } else {
77         Ok(handle)
78     }
79 }
80 
is_console(handle: c::HANDLE) -> bool81 fn is_console(handle: c::HANDLE) -> bool {
82     // `GetConsoleMode` will return false (0) if this is a pipe (we don't care about the reported
83     // mode). This will only detect Windows Console, not other terminals connected to a pipe like
84     // MSYS. Which is exactly what we need, as only Windows Console needs a conversion to UTF-16.
85     let mut mode = 0;
86     unsafe { c::GetConsoleMode(handle, &mut mode) != 0 }
87 }
88 
write( handle_id: c::DWORD, data: &[u8], incomplete_utf8: &mut IncompleteUtf8, ) -> io::Result<usize>89 fn write(
90     handle_id: c::DWORD,
91     data: &[u8],
92     incomplete_utf8: &mut IncompleteUtf8,
93 ) -> io::Result<usize> {
94     if data.is_empty() {
95         return Ok(0);
96     }
97 
98     let handle = get_handle(handle_id)?;
99     if !is_console(handle) {
100         unsafe {
101             let handle = Handle::from_raw_handle(handle);
102             let ret = handle.write(data);
103             handle.into_raw_handle(); // Don't close the handle
104             return ret;
105         }
106     }
107 
108     if incomplete_utf8.len > 0 {
109         assert!(
110             incomplete_utf8.len < 4,
111             "Unexpected number of bytes for incomplete UTF-8 codepoint."
112         );
113         if data[0] >> 6 != 0b10 {
114             // not a continuation byte - reject
115             incomplete_utf8.len = 0;
116             return Err(io::const_io_error!(
117                 io::ErrorKind::InvalidData,
118                 "Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
119             ));
120         }
121         incomplete_utf8.bytes[incomplete_utf8.len as usize] = data[0];
122         incomplete_utf8.len += 1;
123         let char_width = utf8_char_width(incomplete_utf8.bytes[0]);
124         if (incomplete_utf8.len as usize) < char_width {
125             // more bytes needed
126             return Ok(1);
127         }
128         let s = str::from_utf8(&incomplete_utf8.bytes[0..incomplete_utf8.len as usize]);
129         incomplete_utf8.len = 0;
130         match s {
131             Ok(s) => {
132                 assert_eq!(char_width, s.len());
133                 let written = write_valid_utf8_to_console(handle, s)?;
134                 assert_eq!(written, s.len()); // guaranteed by write_valid_utf8_to_console() for single codepoint writes
135                 return Ok(1);
136             }
137             Err(_) => {
138                 return Err(io::const_io_error!(
139                     io::ErrorKind::InvalidData,
140                     "Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
141                 ));
142             }
143         }
144     }
145 
146     // As the console is meant for presenting text, we assume bytes of `data` are encoded as UTF-8,
147     // which needs to be encoded as UTF-16.
148     //
149     // If the data is not valid UTF-8 we write out as many bytes as are valid.
150     // If the first byte is invalid it is either first byte of a multi-byte sequence but the
151     // provided byte slice is too short or it is the first byte of an invalid multi-byte sequence.
152     let len = cmp::min(data.len(), MAX_BUFFER_SIZE / 2);
153     let utf8 = match str::from_utf8(&data[..len]) {
154         Ok(s) => s,
155         Err(ref e) if e.valid_up_to() == 0 => {
156             let first_byte_char_width = utf8_char_width(data[0]);
157             if first_byte_char_width > 1 && data.len() < first_byte_char_width {
158                 incomplete_utf8.bytes[0] = data[0];
159                 incomplete_utf8.len = 1;
160                 return Ok(1);
161             } else {
162                 return Err(io::const_io_error!(
163                     io::ErrorKind::InvalidData,
164                     "Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
165                 ));
166             }
167         }
168         Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
169     };
170 
171     write_valid_utf8_to_console(handle, utf8)
172 }
173 
write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usize>174 fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usize> {
175     debug_assert!(!utf8.is_empty());
176 
177     let mut utf16 = [MaybeUninit::<u16>::uninit(); MAX_BUFFER_SIZE / 2];
178     let utf8 = &utf8[..utf8.floor_char_boundary(utf16.len())];
179 
180     let utf16: &[u16] = unsafe {
181         // Note that this theoretically checks validity twice in the (most common) case
182         // where the underlying byte sequence is valid utf-8 (given the check in `write()`).
183         let result = c::MultiByteToWideChar(
184             c::CP_UTF8,                      // CodePage
185             c::MB_ERR_INVALID_CHARS,         // dwFlags
186             utf8.as_ptr(),                   // lpMultiByteStr
187             utf8.len() as c::c_int,          // cbMultiByte
188             utf16.as_mut_ptr() as c::LPWSTR, // lpWideCharStr
189             utf16.len() as c::c_int,         // cchWideChar
190         );
191         assert!(result != 0, "Unexpected error in MultiByteToWideChar");
192 
193         // Safety: MultiByteToWideChar initializes `result` values.
194         MaybeUninit::slice_assume_init_ref(&utf16[..result as usize])
195     };
196 
197     let mut written = write_u16s(handle, &utf16)?;
198 
199     // Figure out how many bytes of as UTF-8 were written away as UTF-16.
200     if written == utf16.len() {
201         Ok(utf8.len())
202     } else {
203         // Make sure we didn't end up writing only half of a surrogate pair (even though the chance
204         // is tiny). Because it is not possible for user code to re-slice `data` in such a way that
205         // a missing surrogate can be produced (and also because of the UTF-8 validation above),
206         // write the missing surrogate out now.
207         // Buffering it would mean we have to lie about the number of bytes written.
208         let first_code_unit_remaining = utf16[written];
209         if first_code_unit_remaining >= 0xDCEE && first_code_unit_remaining <= 0xDFFF {
210             // low surrogate
211             // We just hope this works, and give up otherwise
212             let _ = write_u16s(handle, &utf16[written..written + 1]);
213             written += 1;
214         }
215         // Calculate the number of bytes of `utf8` that were actually written.
216         let mut count = 0;
217         for ch in utf16[..written].iter() {
218             count += match ch {
219                 0x0000..=0x007F => 1,
220                 0x0080..=0x07FF => 2,
221                 0xDCEE..=0xDFFF => 1, // Low surrogate. We already counted 3 bytes for the other.
222                 _ => 3,
223             };
224         }
225         debug_assert!(String::from_utf16(&utf16[..written]).unwrap() == utf8[..count]);
226         Ok(count)
227     }
228 }
229 
write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize>230 fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
231     debug_assert!(data.len() < u32::MAX as usize);
232     let mut written = 0;
233     cvt(unsafe {
234         c::WriteConsoleW(
235             handle,
236             data.as_ptr() as c::LPCVOID,
237             data.len() as u32,
238             &mut written,
239             ptr::null_mut(),
240         )
241     })?;
242     Ok(written as usize)
243 }
244 
245 impl Stdin {
new() -> Stdin246     pub const fn new() -> Stdin {
247         Stdin { surrogate: 0, incomplete_utf8: IncompleteUtf8::new() }
248     }
249 }
250 
251 impl io::Read for Stdin {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>252     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
253         let handle = get_handle(c::STD_INPUT_HANDLE)?;
254         if !is_console(handle) {
255             unsafe {
256                 let handle = Handle::from_raw_handle(handle);
257                 let ret = handle.read(buf);
258                 handle.into_raw_handle(); // Don't close the handle
259                 return ret;
260             }
261         }
262 
263         // If there are bytes in the incomplete utf-8, start with those.
264         // (No-op if there is nothing in the buffer.)
265         let mut bytes_copied = self.incomplete_utf8.read(buf);
266 
267         if bytes_copied == buf.len() {
268             return Ok(bytes_copied);
269         } else if buf.len() - bytes_copied < 4 {
270             // Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
271             let mut utf16_buf = [MaybeUninit::new(0); 1];
272             // Read one u16 character.
273             let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?;
274             // Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
275             let read_bytes = utf16_to_utf8(
276                 unsafe { MaybeUninit::slice_assume_init_ref(&utf16_buf[..read]) },
277                 &mut self.incomplete_utf8.bytes,
278             )?;
279 
280             // Read in the bytes from incomplete_utf8 until the buffer is full.
281             self.incomplete_utf8.len = read_bytes as u8;
282             // No-op if no bytes.
283             bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]);
284             Ok(bytes_copied)
285         } else {
286             let mut utf16_buf = [MaybeUninit::<u16>::uninit(); MAX_BUFFER_SIZE / 2];
287 
288             // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
289             // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
290             // lost.
291             let amount = cmp::min(buf.len() / 3, utf16_buf.len());
292             let read =
293                 read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
294             // Safety `read_u16s_fixup_surrogates` returns the number of items
295             // initialized.
296             let utf16s = unsafe { MaybeUninit::slice_assume_init_ref(&utf16_buf[..read]) };
297             match utf16_to_utf8(utf16s, buf) {
298                 Ok(value) => return Ok(bytes_copied + value),
299                 Err(e) => return Err(e),
300             }
301         }
302     }
303 }
304 
305 // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our
306 // buffer size, and keep it around for the next read hoping to put them together.
307 // This is a best effort, and might not work if we are not the only reader on Stdin.
read_u16s_fixup_surrogates( handle: c::HANDLE, buf: &mut [MaybeUninit<u16>], mut amount: usize, surrogate: &mut u16, ) -> io::Result<usize>308 fn read_u16s_fixup_surrogates(
309     handle: c::HANDLE,
310     buf: &mut [MaybeUninit<u16>],
311     mut amount: usize,
312     surrogate: &mut u16,
313 ) -> io::Result<usize> {
314     // Insert possibly remaining unpaired surrogate from last read.
315     let mut start = 0;
316     if *surrogate != 0 {
317         buf[0] = MaybeUninit::new(*surrogate);
318         *surrogate = 0;
319         start = 1;
320         if amount == 1 {
321             // Special case: `Stdin::read` guarantees we can always read at least one new `u16`
322             // and combine it with an unpaired surrogate, because the UTF-8 buffer is at least
323             // 4 bytes.
324             amount = 2;
325         }
326     }
327     let mut amount = read_u16s(handle, &mut buf[start..amount])? + start;
328 
329     if amount > 0 {
330         // Safety: The returned `amount` is the number of values initialized,
331         // and it is not 0, so we know that `buf[amount - 1]` have been
332         // initialized.
333         let last_char = unsafe { buf[amount - 1].assume_init() };
334         if last_char >= 0xD800 && last_char <= 0xDBFF {
335             // high surrogate
336             *surrogate = last_char;
337             amount -= 1;
338         }
339     }
340     Ok(amount)
341 }
342 
343 // Returns `Ok(n)` if it initialized `n` values in `buf`.
read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usize>344 fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usize> {
345     // Configure the `pInputControl` parameter to not only return on `\r\n` but also Ctrl-Z, the
346     // traditional DOS method to indicate end of character stream / user input (SUB).
347     // See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole.
348     const CTRL_Z: u16 = 0x1A;
349     const CTRL_Z_MASK: c::ULONG = 1 << CTRL_Z;
350     let input_control = c::CONSOLE_READCONSOLE_CONTROL {
351         nLength: crate::mem::size_of::<c::CONSOLE_READCONSOLE_CONTROL>() as c::ULONG,
352         nInitialChars: 0,
353         dwCtrlWakeupMask: CTRL_Z_MASK,
354         dwControlKeyState: 0,
355     };
356 
357     let mut amount = 0;
358     loop {
359         cvt(unsafe {
360             c::SetLastError(0);
361             c::ReadConsoleW(
362                 handle,
363                 buf.as_mut_ptr() as c::LPVOID,
364                 buf.len() as u32,
365                 &mut amount,
366                 &input_control,
367             )
368         })?;
369 
370         // ReadConsoleW returns success with ERROR_OPERATION_ABORTED for Ctrl-C or Ctrl-Break.
371         // Explicitly check for that case here and try again.
372         if amount == 0 && unsafe { c::GetLastError() } == c::ERROR_OPERATION_ABORTED {
373             continue;
374         }
375         break;
376     }
377     // Safety: if `amount > 0`, then that many bytes were written, so
378     // `buf[amount as usize - 1]` has been initialized.
379     if amount > 0 && unsafe { buf[amount as usize - 1].assume_init() } == CTRL_Z {
380         amount -= 1;
381     }
382     Ok(amount as usize)
383 }
384 
utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize>385 fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
386     debug_assert!(utf16.len() <= c::c_int::MAX as usize);
387     debug_assert!(utf8.len() <= c::c_int::MAX as usize);
388 
389     if utf16.is_empty() {
390         return Ok(0);
391     }
392 
393     let result = unsafe {
394         c::WideCharToMultiByte(
395             c::CP_UTF8,              // CodePage
396             c::WC_ERR_INVALID_CHARS, // dwFlags
397             utf16.as_ptr(),          // lpWideCharStr
398             utf16.len() as c::c_int, // cchWideChar
399             utf8.as_mut_ptr(),       // lpMultiByteStr
400             utf8.len() as c::c_int,  // cbMultiByte
401             ptr::null(),             // lpDefaultChar
402             ptr::null_mut(),         // lpUsedDefaultChar
403         )
404     };
405     if result == 0 {
406         // We can't really do any better than forget all data and return an error.
407         Err(io::const_io_error!(
408             io::ErrorKind::InvalidData,
409             "Windows stdin in console mode does not support non-UTF-16 input; \
410             encountered unpaired surrogate",
411         ))
412     } else {
413         Ok(result as usize)
414     }
415 }
416 
417 impl IncompleteUtf8 {
new() -> IncompleteUtf8418     pub const fn new() -> IncompleteUtf8 {
419         IncompleteUtf8 { bytes: [0; 4], len: 0 }
420     }
421 }
422 
423 impl Stdout {
new() -> Stdout424     pub const fn new() -> Stdout {
425         Stdout { incomplete_utf8: IncompleteUtf8::new() }
426     }
427 }
428 
429 impl io::Write for Stdout {
write(&mut self, buf: &[u8]) -> io::Result<usize>430     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
431         write(c::STD_OUTPUT_HANDLE, buf, &mut self.incomplete_utf8)
432     }
433 
flush(&mut self) -> io::Result<()>434     fn flush(&mut self) -> io::Result<()> {
435         Ok(())
436     }
437 }
438 
439 impl Stderr {
new() -> Stderr440     pub const fn new() -> Stderr {
441         Stderr { incomplete_utf8: IncompleteUtf8::new() }
442     }
443 }
444 
445 impl io::Write for Stderr {
write(&mut self, buf: &[u8]) -> io::Result<usize>446     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
447         write(c::STD_ERROR_HANDLE, buf, &mut self.incomplete_utf8)
448     }
449 
flush(&mut self) -> io::Result<()>450     fn flush(&mut self) -> io::Result<()> {
451         Ok(())
452     }
453 }
454 
is_ebadf(err: &io::Error) -> bool455 pub fn is_ebadf(err: &io::Error) -> bool {
456     err.raw_os_error() == Some(c::ERROR_INVALID_HANDLE as i32)
457 }
458 
panic_output() -> Option<impl io::Write>459 pub fn panic_output() -> Option<impl io::Write> {
460     Some(Stderr::new())
461 }
462