1 // These functions are necessarily inefficient, because they must revert
2 // encoding conversions performed by the standard library. However, there is
3 // currently no better alternative.
4
5 use std::borrow::Cow;
6 use std::error::Error;
7 use std::ffi::OsStr;
8 use std::ffi::OsString;
9 use std::fmt;
10 use std::fmt::Display;
11 use std::fmt::Formatter;
12 use std::ops::Not;
13 use std::os::windows::ffi::OsStrExt;
14 use std::os::windows::ffi::OsStringExt;
15 use std::result;
16 use std::str;
17
18 if_raw_str! {
19 pub(super) mod raw;
20 }
21
22 mod wtf8;
23 use wtf8::DecodeWide;
24
25 #[cfg(test)]
26 mod tests;
27
28 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
29 pub(super) enum EncodingError {
30 Byte(u8),
31 CodePoint(u32),
32 End(),
33 }
34
35 impl EncodingError {
position(&self) -> Cow<'_, str>36 fn position(&self) -> Cow<'_, str> {
37 match self {
38 Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)),
39 Self::CodePoint(code_point) => {
40 Cow::Owned(format!("code point U+{:04X}", code_point))
41 }
42 Self::End() => Cow::Borrowed("end of string"),
43 }
44 }
45 }
46
47 impl Display for EncodingError {
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result48 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
49 write!(
50 f,
51 "byte sequence is not representable in the platform encoding; \
52 error at {}",
53 self.position(),
54 )
55 }
56 }
57
58 impl Error for EncodingError {}
59
60 type Result<T> = result::Result<T, EncodingError>;
61
from_bytes(string: &[u8]) -> Result<Option<OsString>>62 fn from_bytes(string: &[u8]) -> Result<Option<OsString>> {
63 let mut encoder = wtf8::encode_wide(string);
64
65 // Collecting an iterator into a result ignores the size hint:
66 // https://github.com/rust-lang/rust/issues/48994
67 let mut encoded_string = Vec::with_capacity(encoder.size_hint().0);
68 for wchar in &mut encoder {
69 encoded_string.push(wchar?);
70 }
71
72 debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8());
73 Ok(encoder
74 .is_still_utf8()
75 .not()
76 .then(|| OsStringExt::from_wide(&encoded_string)))
77 }
78
to_bytes(os_string: &OsStr) -> Vec<u8>79 fn to_bytes(os_string: &OsStr) -> Vec<u8> {
80 let encoder = OsStrExt::encode_wide(os_string);
81
82 let mut string = Vec::with_capacity(encoder.size_hint().0);
83 string.extend(DecodeWide::new(encoder));
84 string
85 }
86
os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>>87 pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
88 from_bytes(string).map(|os_string| {
89 os_string.map(Cow::Owned).unwrap_or_else(|| {
90 // SAFETY: This slice was validated to be UTF-8.
91 Cow::Borrowed(OsStr::new(unsafe {
92 str::from_utf8_unchecked(string)
93 }))
94 })
95 })
96 }
97
os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]>98 pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
99 Cow::Owned(to_bytes(os_string))
100 }
101
os_string_from_vec(string: Vec<u8>) -> Result<OsString>102 pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
103 from_bytes(&string).map(|os_string| {
104 os_string.unwrap_or_else(|| {
105 // SAFETY: This slice was validated to be UTF-8.
106 unsafe { String::from_utf8_unchecked(string) }.into()
107 })
108 })
109 }
110
os_string_into_vec(os_string: OsString) -> Vec<u8>111 pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
112 to_bytes(&os_string)
113 }
114