1 use std::borrow::Cow;
2 use std::ffi::{OsStr, OsString};
3 use std::path::{Path, PathBuf};
4
5 #[cfg(unix)]
6 use std::os::unix::ffi::{OsStrExt, OsStringExt};
7 #[cfg(windows)]
8 use std::os::windows::ffi::{OsStrExt, OsStringExt};
9
10 use rustc_middle::ty::Ty;
11 use rustc_middle::ty::layout::LayoutOf;
12
13 use crate::*;
14
15 /// Represent how path separator conversion should be done.
16 pub enum PathConversion {
17 HostToTarget,
18 TargetToHost,
19 }
20
21 #[cfg(unix)]
bytes_to_os_str<'tcx>(bytes: &[u8]) -> InterpResult<'tcx, &OsStr>22 pub fn bytes_to_os_str<'tcx>(bytes: &[u8]) -> InterpResult<'tcx, &OsStr> {
23 Ok(OsStr::from_bytes(bytes))
24 }
25 #[cfg(not(unix))]
bytes_to_os_str<'tcx>(bytes: &[u8]) -> InterpResult<'tcx, &OsStr>26 pub fn bytes_to_os_str<'tcx>(bytes: &[u8]) -> InterpResult<'tcx, &OsStr> {
27 // We cannot use `from_os_str_bytes_unchecked` here since we can't trust `bytes`.
28 let s = std::str::from_utf8(bytes)
29 .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
30 Ok(OsStr::new(s))
31 }
32
33 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
34 pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
35 /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
36 /// the Unix APIs usually handle.
read_os_str_from_c_str<'a>( &'a self, ptr: Pointer<Option<Provenance>>, ) -> InterpResult<'tcx, &'a OsStr> where 'tcx: 'a, 'mir: 'a,37 fn read_os_str_from_c_str<'a>(
38 &'a self,
39 ptr: Pointer<Option<Provenance>>,
40 ) -> InterpResult<'tcx, &'a OsStr>
41 where
42 'tcx: 'a,
43 'mir: 'a,
44 {
45 let this = self.eval_context_ref();
46 let bytes = this.read_c_str(ptr)?;
47 bytes_to_os_str(bytes)
48 }
49
50 /// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
51 /// which is what the Windows APIs usually handle.
read_os_str_from_wide_str<'a>( &'a self, ptr: Pointer<Option<Provenance>>, ) -> InterpResult<'tcx, OsString> where 'tcx: 'a, 'mir: 'a,52 fn read_os_str_from_wide_str<'a>(
53 &'a self,
54 ptr: Pointer<Option<Provenance>>,
55 ) -> InterpResult<'tcx, OsString>
56 where
57 'tcx: 'a,
58 'mir: 'a,
59 {
60 #[cfg(windows)]
61 pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
62 Ok(OsString::from_wide(&u16_vec[..]))
63 }
64 #[cfg(not(windows))]
65 pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
66 let s = String::from_utf16(&u16_vec[..])
67 .map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
68 Ok(s.into())
69 }
70
71 let u16_vec = self.eval_context_ref().read_wide_str(ptr)?;
72 u16vec_to_osstring(u16_vec)
73 }
74
75 /// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what
76 /// the Unix APIs usually handle. This function returns `Ok((false, length))` without trying
77 /// to write if `size` is not large enough to fit the contents of `os_string` plus a null
78 /// terminator. It returns `Ok((true, length))` if the writing process was successful. The
79 /// string length returned does include the null terminator.
write_os_str_to_c_str( &mut self, os_str: &OsStr, ptr: Pointer<Option<Provenance>>, size: u64, ) -> InterpResult<'tcx, (bool, u64)>80 fn write_os_str_to_c_str(
81 &mut self,
82 os_str: &OsStr,
83 ptr: Pointer<Option<Provenance>>,
84 size: u64,
85 ) -> InterpResult<'tcx, (bool, u64)> {
86 let bytes = os_str.as_os_str_bytes();
87 self.eval_context_mut().write_c_str(bytes, ptr, size)
88 }
89
90 /// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what the
91 /// Windows APIs usually handle.
92 ///
93 /// If `truncate == false` (the usual mode of operation), this function returns `Ok((false,
94 /// length))` without trying to write if `size` is not large enough to fit the contents of
95 /// `os_string` plus a null terminator. It returns `Ok((true, length))` if the writing process
96 /// was successful. The string length returned does include the null terminator. Length is
97 /// measured in units of `u16.`
98 ///
99 /// If `truncate == true`, then in case `size` is not large enough it *will* write the first
100 /// `size.saturating_sub(1)` many items, followed by a null terminator (if `size > 0`).
write_os_str_to_wide_str( &mut self, os_str: &OsStr, ptr: Pointer<Option<Provenance>>, size: u64, truncate: bool, ) -> InterpResult<'tcx, (bool, u64)>101 fn write_os_str_to_wide_str(
102 &mut self,
103 os_str: &OsStr,
104 ptr: Pointer<Option<Provenance>>,
105 size: u64,
106 truncate: bool,
107 ) -> InterpResult<'tcx, (bool, u64)> {
108 #[cfg(windows)]
109 fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
110 Ok(os_str.encode_wide().collect())
111 }
112 #[cfg(not(windows))]
113 fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
114 // On non-Windows platforms the best we can do to transform Vec<u16> from/to OS strings is to do the
115 // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
116 // valid.
117 os_str
118 .to_str()
119 .map(|s| s.encode_utf16().collect())
120 .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
121 }
122
123 let u16_vec = os_str_to_u16vec(os_str)?;
124 let (written, size_needed) = self.eval_context_mut().write_wide_str(&u16_vec, ptr, size)?;
125 if truncate && !written && size > 0 {
126 // Write the truncated part that fits.
127 let truncated_data = &u16_vec[..size.saturating_sub(1).try_into().unwrap()];
128 let (written, written_len) =
129 self.eval_context_mut().write_wide_str(truncated_data, ptr, size)?;
130 assert!(written && written_len == size);
131 }
132 Ok((written, size_needed))
133 }
134
135 /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes.
alloc_os_str_as_c_str( &mut self, os_str: &OsStr, memkind: MemoryKind<MiriMemoryKind>, ) -> InterpResult<'tcx, Pointer<Option<Provenance>>>136 fn alloc_os_str_as_c_str(
137 &mut self,
138 os_str: &OsStr,
139 memkind: MemoryKind<MiriMemoryKind>,
140 ) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
141 let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0` terminator.
142 let this = self.eval_context_mut();
143
144 let arg_type = Ty::new_array(this.tcx.tcx,this.tcx.types.u8, size);
145 let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?;
146 let (written, _) = self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap();
147 assert!(written);
148 Ok(arg_place.ptr)
149 }
150
151 /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`.
alloc_os_str_as_wide_str( &mut self, os_str: &OsStr, memkind: MemoryKind<MiriMemoryKind>, ) -> InterpResult<'tcx, Pointer<Option<Provenance>>>152 fn alloc_os_str_as_wide_str(
153 &mut self,
154 os_str: &OsStr,
155 memkind: MemoryKind<MiriMemoryKind>,
156 ) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
157 let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0x0000` terminator.
158 let this = self.eval_context_mut();
159
160 let arg_type = Ty::new_array(this.tcx.tcx,this.tcx.types.u16, size);
161 let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?;
162 let (written, _) =
163 self.write_os_str_to_wide_str(os_str, arg_place.ptr, size, /*truncate*/ false).unwrap();
164 assert!(written);
165 Ok(arg_place.ptr)
166 }
167
168 /// Read a null-terminated sequence of bytes, and perform path separator conversion if needed.
read_path_from_c_str<'a>( &'a self, ptr: Pointer<Option<Provenance>>, ) -> InterpResult<'tcx, Cow<'a, Path>> where 'tcx: 'a, 'mir: 'a,169 fn read_path_from_c_str<'a>(
170 &'a self,
171 ptr: Pointer<Option<Provenance>>,
172 ) -> InterpResult<'tcx, Cow<'a, Path>>
173 where
174 'tcx: 'a,
175 'mir: 'a,
176 {
177 let this = self.eval_context_ref();
178 let os_str = this.read_os_str_from_c_str(ptr)?;
179
180 Ok(match this.convert_path(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
181 Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
182 Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
183 })
184 }
185
186 /// Read a null-terminated sequence of `u16`s, and perform path separator conversion if needed.
read_path_from_wide_str( &self, ptr: Pointer<Option<Provenance>>, ) -> InterpResult<'tcx, PathBuf>187 fn read_path_from_wide_str(
188 &self,
189 ptr: Pointer<Option<Provenance>>,
190 ) -> InterpResult<'tcx, PathBuf> {
191 let this = self.eval_context_ref();
192 let os_str = this.read_os_str_from_wide_str(ptr)?;
193
194 Ok(this.convert_path(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into())
195 }
196
197 /// Write a Path to the machine memory (as a null-terminated sequence of bytes),
198 /// adjusting path separators if needed.
write_path_to_c_str( &mut self, path: &Path, ptr: Pointer<Option<Provenance>>, size: u64, ) -> InterpResult<'tcx, (bool, u64)>199 fn write_path_to_c_str(
200 &mut self,
201 path: &Path,
202 ptr: Pointer<Option<Provenance>>,
203 size: u64,
204 ) -> InterpResult<'tcx, (bool, u64)> {
205 let this = self.eval_context_mut();
206 let os_str =
207 this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
208 this.write_os_str_to_c_str(&os_str, ptr, size)
209 }
210
211 /// Write a Path to the machine memory (as a null-terminated sequence of `u16`s),
212 /// adjusting path separators if needed.
write_path_to_wide_str( &mut self, path: &Path, ptr: Pointer<Option<Provenance>>, size: u64, truncate: bool, ) -> InterpResult<'tcx, (bool, u64)>213 fn write_path_to_wide_str(
214 &mut self,
215 path: &Path,
216 ptr: Pointer<Option<Provenance>>,
217 size: u64,
218 truncate: bool,
219 ) -> InterpResult<'tcx, (bool, u64)> {
220 let this = self.eval_context_mut();
221 let os_str =
222 this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
223 this.write_os_str_to_wide_str(&os_str, ptr, size, truncate)
224 }
225
226 /// Allocate enough memory to store a Path as a null-terminated sequence of bytes,
227 /// adjusting path separators if needed.
alloc_path_as_c_str( &mut self, path: &Path, memkind: MemoryKind<MiriMemoryKind>, ) -> InterpResult<'tcx, Pointer<Option<Provenance>>>228 fn alloc_path_as_c_str(
229 &mut self,
230 path: &Path,
231 memkind: MemoryKind<MiriMemoryKind>,
232 ) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
233 let this = self.eval_context_mut();
234 let os_str =
235 this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
236 this.alloc_os_str_as_c_str(&os_str, memkind)
237 }
238
239 /// Allocate enough memory to store a Path as a null-terminated sequence of `u16`s,
240 /// adjusting path separators if needed.
alloc_path_as_wide_str( &mut self, path: &Path, memkind: MemoryKind<MiriMemoryKind>, ) -> InterpResult<'tcx, Pointer<Option<Provenance>>>241 fn alloc_path_as_wide_str(
242 &mut self,
243 path: &Path,
244 memkind: MemoryKind<MiriMemoryKind>,
245 ) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
246 let this = self.eval_context_mut();
247 let os_str =
248 this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
249 this.alloc_os_str_as_wide_str(&os_str, memkind)
250 }
251
252 #[allow(clippy::get_first)]
convert_path<'a>( &self, os_str: Cow<'a, OsStr>, direction: PathConversion, ) -> Cow<'a, OsStr>253 fn convert_path<'a>(
254 &self,
255 os_str: Cow<'a, OsStr>,
256 direction: PathConversion,
257 ) -> Cow<'a, OsStr> {
258 let this = self.eval_context_ref();
259 let target_os = &this.tcx.sess.target.os;
260
261 #[cfg(windows)]
262 return if target_os == "windows" {
263 // Windows-on-Windows, all fine.
264 os_str
265 } else {
266 // Unix target, Windows host.
267 let (from, to) = match direction {
268 PathConversion::HostToTarget => ('\\', '/'),
269 PathConversion::TargetToHost => ('/', '\\'),
270 };
271 let mut converted = os_str
272 .encode_wide()
273 .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
274 .collect::<Vec<_>>();
275 // We also have to ensure that absolute paths remain absolute.
276 match direction {
277 PathConversion::HostToTarget => {
278 // If this is an absolute Windows path that starts with a drive letter (`C:/...`
279 // after separator conversion), it would not be considered absolute by Unix
280 // target code.
281 if converted.get(1).copied() == Some(b':' as u16)
282 && converted.get(2).copied() == Some(b'/' as u16)
283 {
284 // We add a `/` at the beginning, to store the absolute Windows
285 // path in something that looks like an absolute Unix path.
286 converted.insert(0, b'/' as u16);
287 }
288 }
289 PathConversion::TargetToHost => {
290 // If the path is `\C:\`, the leading backslash was probably added by the above code
291 // and we should get rid of it again.
292 if converted.get(0).copied() == Some(b'\\' as u16)
293 && converted.get(2).copied() == Some(b':' as u16)
294 && converted.get(3).copied() == Some(b'\\' as u16)
295 {
296 converted.remove(0);
297 }
298 }
299 }
300 Cow::Owned(OsString::from_wide(&converted))
301 };
302 #[cfg(unix)]
303 return if target_os == "windows" {
304 // Windows target, Unix host.
305 let (from, to) = match direction {
306 PathConversion::HostToTarget => (b'/', b'\\'),
307 PathConversion::TargetToHost => (b'\\', b'/'),
308 };
309 let mut converted = os_str
310 .as_bytes()
311 .iter()
312 .map(|&wchar| if wchar == from { to } else { wchar })
313 .collect::<Vec<_>>();
314 // We also have to ensure that absolute paths remain absolute.
315 match direction {
316 PathConversion::HostToTarget => {
317 // If this start withs a `\`, we add `\\?` so it starts with `\\?\` which is
318 // some magic path on Windows that *is* considered absolute.
319 if converted.get(0).copied() == Some(b'\\') {
320 converted.splice(0..0, b"\\\\?".iter().copied());
321 }
322 }
323 PathConversion::TargetToHost => {
324 // If this starts with `//?/`, it was probably produced by the above code and we
325 // remove the `//?` that got added to get the Unix path back out.
326 if converted.get(0).copied() == Some(b'/')
327 && converted.get(1).copied() == Some(b'/')
328 && converted.get(2).copied() == Some(b'?')
329 && converted.get(3).copied() == Some(b'/')
330 {
331 // Remove first 3 characters
332 converted.splice(0..3, std::iter::empty());
333 }
334 }
335 }
336 Cow::Owned(OsString::from_vec(converted))
337 } else {
338 // Unix-on-Unix, all is fine.
339 os_str
340 };
341 }
342 }
343