1 //! This crate allows interacting with the data stored by [`OsStr`] and
2 //! [`OsString`], without resorting to panics or corruption for invalid UTF-8.
3 //! Thus, methods can be used that are already defined on [`[u8]`][slice] and
4 //! [`Vec<u8>`].
5 //!
6 //! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`]
7 //! from a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which
8 //! requires the bytes to be valid in UTF-8. However, since this crate makes
9 //! conversions directly between the platform encoding and raw bytes, even some
10 //! strings invalid in UTF-8 can be converted.
11 //!
12 //! In most cases, [`RawOsStr`] and [`RawOsString`] should be used.
13 //! [`OsStrBytes`] and [`OsStringBytes`] provide lower-level APIs that are
14 //! easier to misuse.
15 //!
16 //! # Encoding
17 //!
18 //! The encoding of bytes returned or accepted by methods of this crate is
19 //! intentionally left unspecified. It may vary for different platforms, so
20 //! defining it would run contrary to the goal of generic string handling.
21 //! However, the following invariants will always be upheld:
22 //!
23 //! - The encoding will be compatible with UTF-8. In particular, splitting an
24 //! encoded byte sequence by a UTF-8–encoded character always produces
25 //! other valid byte sequences. They can be re-encoded without error using
26 //! [`RawOsString::into_os_string`] and similar methods.
27 //!
28 //! - All characters valid in platform strings are representable. [`OsStr`] and
29 //! [`OsString`] can always be losslessly reconstructed from extracted bytes.
30 //!
31 //! Note that the chosen encoding may not match how Rust stores these strings
32 //! internally, which is undocumented. For instance, the result of calling
33 //! [`OsStr::len`] will not necessarily match the number of bytes this crate
34 //! uses to represent the same string.
35 //!
36 //! Additionally, concatenation may yield unexpected results without a UTF-8
37 //! separator. If two platform strings need to be concatenated, the only safe
38 //! way to do so is using [`OsString::push`]. This limitation also makes it
39 //! undesirable to use the bytes in interchange.
40 //!
41 //! Since this encoding can change between versions and platforms, it should
42 //! not be used for storage. The standard library provides implementations of
43 //! [`OsStrExt`] and [`OsStringExt`] for various platforms, which should be
44 //! preferred for that use case.
45 //!
46 //! # User Input
47 //!
48 //! Traits in this crate should ideally not be used to convert byte sequences
49 //! that did not originate from [`OsStr`] or a related struct. The encoding
50 //! used by this crate is an implementation detail, so it does not make sense
51 //! to expose it to users.
52 //!
53 //! Crate [bstr] offers some useful alternative methods, such as
54 //! [`ByteSlice::to_os_str`] and [`ByteVec::into_os_string`], that are meant
55 //! for user input. But, they reject some byte sequences used to represent
56 //! valid platform strings, which would be undesirable for reliable path
57 //! handling. They are best used only when accepting unknown input.
58 //!
59 //! This crate is meant to help when you already have an instance of [`OsStr`]
60 //! and need to modify the data in a lossless way.
61 //!
62 //! # Features
63 //!
64 //! These features are optional and can be enabled or disabled in a
65 //! "Cargo.toml" file.
66 //!
67 //! ### Default Features
68 //!
69 //! - **memchr** -
70 //! Changes the implementation to use crate [memchr] for better performance.
71 //! This feature is useless when "raw\_os\_str" is disabled.
72 //!
73 //! For more information, see [`RawOsStr`][memchr complexity].
74 //!
75 //! - **raw\_os\_str** -
76 //! Provides:
77 //! - [`iter`]
78 //! - [`Pattern`]
79 //! - [`RawOsStr`]
80 //! - [`RawOsStrCow`]
81 //! - [`RawOsString`]
82 //!
83 //! ### Optional Features
84 //!
85 //! - **checked\_conversions** -
86 //! Provides:
87 //! - [`EncodingError`]
88 //! - [`OsStrBytes::from_raw_bytes`]
89 //! - [`OsStringBytes::from_raw_vec`]
90 //! - [`RawOsStr::from_raw_bytes`]
91 //! - [`RawOsString::from_raw_vec`]
92 //!
93 //! Because this feature should not be used in libraries, the
94 //! "OS_STR_BYTES_CHECKED_CONVERSIONS" environment variable must be defined
95 //! during compilation.
96 //!
97 //! - **print\_bytes** -
98 //! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and
99 //! [`RawOsString`].
100 //!
101 //! - **uniquote** -
102 //! Provides implementations of [`uniquote::Quote`] for [`RawOsStr`] and
103 //! [`RawOsString`].
104 //!
105 //! # Implementation
106 //!
107 //! Some methods return [`Cow`] to account for platform differences. However,
108 //! no guarantee is made that the same variant of that enum will always be
109 //! returned for the same platform. Whichever can be constructed most
110 //! efficiently will be returned.
111 //!
112 //! All traits are [sealed], meaning that they can only be implemented by this
113 //! crate. Otherwise, backward compatibility would be more difficult to
114 //! maintain for new features.
115 //!
116 //! # Complexity
117 //!
118 //! Conversion method complexities will vary based on what functionality is
119 //! available for the platform. At worst, they will all be linear, but some can
120 //! take constant time. For example, [`RawOsString::into_os_string`] might be
121 //! able to reuse its allocation.
122 //!
123 //! # Examples
124 //!
125 //! ```
126 //! # use std::io;
127 //! #
128 //! # #[cfg(feature = "raw_os_str")]
129 //! # {
130 //! # #[cfg(any())]
131 //! use std::env;
132 //! use std::fs;
133 //!
134 //! use os_str_bytes::RawOsStr;
135 //!
136 //! # mod env {
137 //! # use std::env;
138 //! # use std::ffi::OsString;
139 //! #
140 //! # pub fn args_os() -> impl Iterator<Item = OsString> {
141 //! # let mut file = env::temp_dir();
142 //! # file.push("os_str_bytes\u{E9}.txt");
143 //! # return vec![OsString::new(), file.into_os_string()].into_iter();
144 //! # }
145 //! # }
146 //! #
147 //! for file in env::args_os().skip(1) {
148 //! if !RawOsStr::new(&file).starts_with('-') {
149 //! let string = "Hello, world!";
150 //! fs::write(&file, string)?;
151 //! assert_eq!(string, fs::read_to_string(file)?);
152 //! }
153 //! }
154 //! # }
155 //! #
156 //! # Ok::<_, io::Error>(())
157 //! ```
158 //!
159 //! [bstr]: https://crates.io/crates/bstr
160 //! [`ByteSlice::to_os_str`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteSlice.html#method.to_os_str
161 //! [`ByteVec::into_os_string`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteVec.html#method.into_os_string
162 //! [memchr complexity]: RawOsStr#complexity
163 //! [memchr]: https://crates.io/crates/memchr
164 //! [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
165 //! [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
166 //! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed
167 //! [print\_bytes]: https://crates.io/crates/print_bytes
168
169 #![cfg_attr(not(feature = "checked_conversions"), allow(deprecated))]
170 // Only require a nightly compiler when building documentation for docs.rs.
171 // This is a private option that should not be used.
172 // https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407
173 // https://github.com/dylni/os_str_bytes/issues/2
174 #![cfg_attr(os_str_bytes_docs_rs, feature(doc_cfg))]
175 // Nightly is also currently required for the SGX platform.
176 #![cfg_attr(
177 all(target_vendor = "fortanix", target_env = "sgx"),
178 feature(sgx_platform)
179 )]
180 #![warn(unsafe_op_in_unsafe_fn)]
181 #![warn(unused_results)]
182
183 use std::borrow::Cow;
184 use std::error::Error;
185 use std::ffi::OsStr;
186 use std::ffi::OsString;
187 use std::fmt;
188 use std::fmt::Display;
189 use std::fmt::Formatter;
190 use std::path::Path;
191 use std::path::PathBuf;
192 use std::result;
193
194 macro_rules! if_checked_conversions {
195 ( $($item:item)+ ) => {
196 $(
197 #[cfg(feature = "checked_conversions")]
198 $item
199 )+
200 };
201 }
202
203 #[cfg(not(os_str_bytes_docs_rs))]
204 if_checked_conversions! {
205 const _: &str = env!(
206 "OS_STR_BYTES_CHECKED_CONVERSIONS",
207 "The 'OS_STR_BYTES_CHECKED_CONVERSIONS' environment variable must be \
208 defined to use the 'checked_conversions' feature.",
209 );
210 }
211
212 #[rustfmt::skip]
213 macro_rules! deprecated_checked_conversion {
214 ( $message:expr , $item:item ) => {
215 #[cfg_attr(
216 not(feature = "checked_conversions"),
217 deprecated = $message
218 )]
219 $item
220 };
221 }
222
223 macro_rules! expect_encoded {
224 ( $result:expr ) => {
225 $result.expect("invalid raw bytes")
226 };
227 }
228
229 macro_rules! if_raw_str {
230 ( $($item:item)+ ) => {
231 $(
232 #[cfg(feature = "raw_os_str")]
233 $item
234 )+
235 };
236 }
237
238 #[cfg_attr(
239 all(target_family = "wasm", target_os = "unknown"),
240 path = "wasm/mod.rs"
241 )]
242 #[cfg_attr(windows, path = "windows/mod.rs")]
243 #[cfg_attr(
244 not(any(all(target_family = "wasm", target_os = "unknown"), windows)),
245 path = "common/mod.rs"
246 )]
247 mod imp;
248
249 #[cfg(any(
250 all(
251 feature = "raw_os_str",
252 target_family = "wasm",
253 target_os = "unknown",
254 ),
255 windows,
256 ))]
257 mod util;
258
259 if_raw_str! {
260 pub mod iter;
261
262 mod pattern;
263 pub use pattern::Pattern;
264
265 mod raw_str;
266 pub use raw_str::RawOsStr;
267 pub use raw_str::RawOsStrCow;
268 pub use raw_str::RawOsString;
269 }
270
271 deprecated_checked_conversion! {
272 "use `OsStrBytes::assert_from_raw_bytes` or \
273 `OsStringBytes::assert_from_raw_vec` instead, or enable the \
274 'checked_conversions' feature",
275 /// The error that occurs when a byte sequence is not representable in the
276 /// platform encoding.
277 ///
278 /// [`Result::unwrap`] should almost always be called on results containing
279 /// this error. It should be known whether or not byte sequences are
280 /// properly encoded for the platform, since [the module-level
281 /// documentation][encoding] discourages using encoded bytes in
282 /// interchange. Results are returned primarily to make panicking behavior
283 /// explicit.
284 ///
285 /// On Unix, this error is never returned, but [`OsStrExt`] or
286 /// [`OsStringExt`] should be used instead if that needs to be guaranteed.
287 ///
288 /// [encoding]: self#encoding
289 /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
290 /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
291 /// [`Result::unwrap`]: ::std::result::Result::unwrap
292 #[derive(Clone, Debug, Eq, PartialEq)]
293 #[cfg_attr(
294 os_str_bytes_docs_rs,
295 doc(cfg(feature = "checked_conversions"))
296 )]
297 pub struct EncodingError(imp::EncodingError);
298 }
299
300 impl Display for EncodingError {
301 #[inline]
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result302 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
303 self.0.fmt(f)
304 }
305 }
306
307 impl Error for EncodingError {}
308
309 type Result<T> = result::Result<T, EncodingError>;
310
from_raw_bytes<'a, S>( string: S, ) -> result::Result<Cow<'a, OsStr>, imp::EncodingError> where S: Into<Cow<'a, [u8]>>,311 fn from_raw_bytes<'a, S>(
312 string: S,
313 ) -> result::Result<Cow<'a, OsStr>, imp::EncodingError>
314 where
315 S: Into<Cow<'a, [u8]>>,
316 {
317 match string.into() {
318 Cow::Borrowed(string) => imp::os_str_from_bytes(string),
319 Cow::Owned(string) => imp::os_string_from_vec(string).map(Cow::Owned),
320 }
321 }
322
cow_os_str_into_path(string: Cow<'_, OsStr>) -> Cow<'_, Path>323 fn cow_os_str_into_path(string: Cow<'_, OsStr>) -> Cow<'_, Path> {
324 match string {
325 Cow::Borrowed(string) => Cow::Borrowed(Path::new(string)),
326 Cow::Owned(string) => Cow::Owned(string.into()),
327 }
328 }
329
330 /// A platform agnostic variant of [`OsStrExt`].
331 ///
332 /// For more information, see [the module-level documentation][module].
333 ///
334 /// [module]: self
335 /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
336 pub trait OsStrBytes: private::Sealed + ToOwned {
337 /// Converts a byte string into an equivalent platform-native string.
338 ///
339 /// # Panics
340 ///
341 /// Panics if the string is not valid for the [unspecified encoding] used
342 /// by this crate.
343 ///
344 /// # Examples
345 ///
346 /// ```
347 /// use std::env;
348 /// use std::ffi::OsStr;
349 /// # use std::io;
350 ///
351 /// use os_str_bytes::OsStrBytes;
352 ///
353 /// let os_string = env::current_exe()?;
354 /// let os_bytes = os_string.to_raw_bytes();
355 /// assert_eq!(os_string, OsStr::assert_from_raw_bytes(os_bytes));
356 /// #
357 /// # Ok::<_, io::Error>(())
358 /// ```
359 ///
360 /// [unspecified encoding]: self#encoding
361 #[must_use = "method should not be used for validation"]
362 #[track_caller]
assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> where S: Into<Cow<'a, [u8]>>363 fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
364 where
365 S: Into<Cow<'a, [u8]>>;
366
367 deprecated_checked_conversion! {
368 "use `assert_from_raw_bytes` instead, or enable the \
369 'checked_conversions' feature",
370 /// Converts a byte string into an equivalent platform-native string.
371 ///
372 /// [`assert_from_raw_bytes`] should almost always be used instead. For
373 /// more information, see [`EncodingError`].
374 ///
375 /// # Errors
376 ///
377 /// See documentation for [`EncodingError`].
378 ///
379 /// # Examples
380 ///
381 /// ```
382 /// use std::env;
383 /// use std::ffi::OsStr;
384 /// # use std::io;
385 ///
386 /// use os_str_bytes::OsStrBytes;
387 ///
388 /// let os_string = env::current_exe()?;
389 /// let os_bytes = os_string.to_raw_bytes();
390 /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap());
391 /// #
392 /// # Ok::<_, io::Error>(())
393 /// ```
394 ///
395 /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes
396 #[cfg_attr(
397 os_str_bytes_docs_rs,
398 doc(cfg(feature = "checked_conversions"))
399 )]
400 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
401 where
402 S: Into<Cow<'a, [u8]>>;
403 }
404
405 /// Converts a platform-native string into an equivalent byte string.
406 ///
407 /// The returned string will use an [unspecified encoding].
408 ///
409 /// # Examples
410 ///
411 /// ```
412 /// use std::ffi::OsStr;
413 ///
414 /// use os_str_bytes::OsStrBytes;
415 ///
416 /// let string = "foobar";
417 /// let os_string = OsStr::new(string);
418 /// assert_eq!(string.as_bytes(), &*os_string.to_raw_bytes());
419 /// ```
420 ///
421 /// [unspecified encoding]: self#encoding
422 #[must_use]
to_raw_bytes(&self) -> Cow<'_, [u8]>423 fn to_raw_bytes(&self) -> Cow<'_, [u8]>;
424 }
425
426 impl OsStrBytes for OsStr {
427 #[inline]
assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> where S: Into<Cow<'a, [u8]>>,428 fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
429 where
430 S: Into<Cow<'a, [u8]>>,
431 {
432 expect_encoded!(from_raw_bytes(string))
433 }
434
435 #[inline]
from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> where S: Into<Cow<'a, [u8]>>,436 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
437 where
438 S: Into<Cow<'a, [u8]>>,
439 {
440 from_raw_bytes(string).map_err(EncodingError)
441 }
442
443 #[inline]
to_raw_bytes(&self) -> Cow<'_, [u8]>444 fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
445 imp::os_str_to_bytes(self)
446 }
447 }
448
449 impl OsStrBytes for Path {
450 #[inline]
assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> where S: Into<Cow<'a, [u8]>>,451 fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
452 where
453 S: Into<Cow<'a, [u8]>>,
454 {
455 cow_os_str_into_path(OsStr::assert_from_raw_bytes(string))
456 }
457
458 #[inline]
from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> where S: Into<Cow<'a, [u8]>>,459 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
460 where
461 S: Into<Cow<'a, [u8]>>,
462 {
463 OsStr::from_raw_bytes(string).map(cow_os_str_into_path)
464 }
465
466 #[inline]
to_raw_bytes(&self) -> Cow<'_, [u8]>467 fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
468 self.as_os_str().to_raw_bytes()
469 }
470 }
471
472 /// A platform agnostic variant of [`OsStringExt`].
473 ///
474 /// For more information, see [the module-level documentation][module].
475 ///
476 /// [module]: self
477 /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
478 pub trait OsStringBytes: private::Sealed + Sized {
479 /// Converts a byte string into an equivalent platform-native string.
480 ///
481 /// # Panics
482 ///
483 /// Panics if the string is not valid for the [unspecified encoding] used
484 /// by this crate.
485 ///
486 /// # Examples
487 ///
488 /// ```
489 /// use std::env;
490 /// use std::ffi::OsString;
491 /// # use std::io;
492 ///
493 /// use os_str_bytes::OsStringBytes;
494 ///
495 /// let os_string = env::current_exe()?;
496 /// let os_bytes = os_string.clone().into_raw_vec();
497 /// assert_eq!(os_string, OsString::assert_from_raw_vec(os_bytes));
498 /// #
499 /// # Ok::<_, io::Error>(())
500 /// ```
501 ///
502 /// [unspecified encoding]: self#encoding
503 #[must_use = "method should not be used for validation"]
504 #[track_caller]
assert_from_raw_vec(string: Vec<u8>) -> Self505 fn assert_from_raw_vec(string: Vec<u8>) -> Self;
506
507 deprecated_checked_conversion! {
508 "use `assert_from_raw_vec` instead, or enable the \
509 'checked_conversions' feature",
510 /// Converts a byte string into an equivalent platform-native string.
511 ///
512 /// [`assert_from_raw_vec`] should almost always be used instead. For
513 /// more information, see [`EncodingError`].
514 ///
515 /// # Errors
516 ///
517 /// See documentation for [`EncodingError`].
518 ///
519 /// # Examples
520 ///
521 /// ```
522 /// use std::env;
523 /// use std::ffi::OsString;
524 /// # use std::io;
525 ///
526 /// use os_str_bytes::OsStringBytes;
527 ///
528 /// let os_string = env::current_exe()?;
529 /// let os_bytes = os_string.clone().into_raw_vec();
530 /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap());
531 /// #
532 /// # Ok::<_, io::Error>(())
533 /// ```
534 ///
535 /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec
536 #[cfg_attr(
537 os_str_bytes_docs_rs,
538 doc(cfg(feature = "checked_conversions"))
539 )]
540 fn from_raw_vec(string: Vec<u8>) -> Result<Self>;
541 }
542
543 /// Converts a platform-native string into an equivalent byte string.
544 ///
545 /// The returned string will use an [unspecified encoding].
546 ///
547 /// # Examples
548 ///
549 /// ```
550 /// use std::ffi::OsString;
551 ///
552 /// use os_str_bytes::OsStringBytes;
553 ///
554 /// let string = "foobar".to_owned();
555 /// let os_string: OsString = string.clone().into();
556 /// assert_eq!(string.into_bytes(), os_string.into_raw_vec());
557 /// ```
558 ///
559 /// [unspecified encoding]: self#encoding
560 #[must_use]
into_raw_vec(self) -> Vec<u8>561 fn into_raw_vec(self) -> Vec<u8>;
562 }
563
564 impl OsStringBytes for OsString {
565 #[inline]
assert_from_raw_vec(string: Vec<u8>) -> Self566 fn assert_from_raw_vec(string: Vec<u8>) -> Self {
567 expect_encoded!(imp::os_string_from_vec(string))
568 }
569
570 #[inline]
from_raw_vec(string: Vec<u8>) -> Result<Self>571 fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
572 imp::os_string_from_vec(string).map_err(EncodingError)
573 }
574
575 #[inline]
into_raw_vec(self) -> Vec<u8>576 fn into_raw_vec(self) -> Vec<u8> {
577 imp::os_string_into_vec(self)
578 }
579 }
580
581 impl OsStringBytes for PathBuf {
582 #[inline]
assert_from_raw_vec(string: Vec<u8>) -> Self583 fn assert_from_raw_vec(string: Vec<u8>) -> Self {
584 OsString::assert_from_raw_vec(string).into()
585 }
586
587 #[inline]
from_raw_vec(string: Vec<u8>) -> Result<Self>588 fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
589 OsString::from_raw_vec(string).map(Into::into)
590 }
591
592 #[inline]
into_raw_vec(self) -> Vec<u8>593 fn into_raw_vec(self) -> Vec<u8> {
594 self.into_os_string().into_raw_vec()
595 }
596 }
597
598 mod private {
599 use std::ffi::OsStr;
600 use std::ffi::OsString;
601 use std::path::Path;
602 use std::path::PathBuf;
603
604 if_raw_str! {
605 use std::borrow::Cow;
606
607 use super::RawOsStr;
608 }
609
610 pub trait Sealed {}
611
612 impl Sealed for char {}
613 impl Sealed for OsStr {}
614 impl Sealed for OsString {}
615 impl Sealed for Path {}
616 impl Sealed for PathBuf {}
617 impl Sealed for &str {}
618 impl Sealed for &String {}
619
620 if_raw_str! {
621 impl Sealed for Cow<'_, RawOsStr> {}
622 }
623 }
624