• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // These functions are necessarily inefficient, because they must revert
2 // encoding conversions performed by the standard library. However, there is
3 // currently no better alternative.
4 
5 use std::borrow::Cow;
6 use std::error::Error;
7 use std::ffi::OsStr;
8 use std::ffi::OsString;
9 use std::fmt;
10 use std::fmt::Display;
11 use std::fmt::Formatter;
12 use std::ops::Not;
13 use std::os::windows::ffi::OsStrExt;
14 use std::os::windows::ffi::OsStringExt;
15 use std::result;
16 use std::str;
17 
18 if_raw_str! {
19     pub(super) mod raw;
20 }
21 
22 mod wtf8;
23 use wtf8::DecodeWide;
24 
25 #[cfg(test)]
26 mod tests;
27 
28 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
29 pub(super) enum EncodingError {
30     Byte(u8),
31     CodePoint(u32),
32     End(),
33 }
34 
35 impl EncodingError {
position(&self) -> Cow<'_, str>36     fn position(&self) -> Cow<'_, str> {
37         match self {
38             Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)),
39             Self::CodePoint(code_point) => {
40                 Cow::Owned(format!("code point U+{:04X}", code_point))
41             }
42             Self::End() => Cow::Borrowed("end of string"),
43         }
44     }
45 }
46 
47 impl Display for EncodingError {
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result48     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
49         write!(
50             f,
51             "byte sequence is not representable in the platform encoding; \
52              error at {}",
53             self.position(),
54         )
55     }
56 }
57 
58 impl Error for EncodingError {}
59 
60 type Result<T> = result::Result<T, EncodingError>;
61 
from_bytes(string: &[u8]) -> Result<Option<OsString>>62 fn from_bytes(string: &[u8]) -> Result<Option<OsString>> {
63     let mut encoder = wtf8::encode_wide(string);
64 
65     // Collecting an iterator into a result ignores the size hint:
66     // https://github.com/rust-lang/rust/issues/48994
67     let mut encoded_string = Vec::with_capacity(encoder.size_hint().0);
68     for wchar in &mut encoder {
69         encoded_string.push(wchar?);
70     }
71 
72     debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8());
73     Ok(encoder
74         .is_still_utf8()
75         .not()
76         .then(|| OsStringExt::from_wide(&encoded_string)))
77 }
78 
to_bytes(os_string: &OsStr) -> Vec<u8>79 fn to_bytes(os_string: &OsStr) -> Vec<u8> {
80     let encoder = OsStrExt::encode_wide(os_string);
81 
82     let mut string = Vec::with_capacity(encoder.size_hint().0);
83     string.extend(DecodeWide::new(encoder));
84     string
85 }
86 
os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>>87 pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
88     from_bytes(string).map(|os_string| {
89         os_string.map(Cow::Owned).unwrap_or_else(|| {
90             // SAFETY: This slice was validated to be UTF-8.
91             Cow::Borrowed(OsStr::new(unsafe {
92                 str::from_utf8_unchecked(string)
93             }))
94         })
95     })
96 }
97 
os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]>98 pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
99     Cow::Owned(to_bytes(os_string))
100 }
101 
os_string_from_vec(string: Vec<u8>) -> Result<OsString>102 pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
103     from_bytes(&string).map(|os_string| {
104         os_string.unwrap_or_else(|| {
105             // SAFETY: This slice was validated to be UTF-8.
106             unsafe { String::from_utf8_unchecked(string) }.into()
107         })
108     })
109 }
110 
os_string_into_vec(os_string: OsString) -> Vec<u8>111 pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
112     to_bytes(&os_string)
113 }
114