// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #[cfg(feature = "alloc")] use alloc::boxed::Box; use core::cmp::Ordering; use core::fmt; use core::ops::Deref; /// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant. /// /// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For /// example, strings that are keys of a map don't need to ever be reified as `str`s. /// /// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`]. /// /// The main advantage of this type over `[u8]` is that it serializes as a string in /// human-readable formats like JSON. /// /// # Examples /// /// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]: /// /// ``` /// use potential_utf::PotentialUtf8; /// use zerovec::ZeroMap; /// /// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation. /// let map: ZeroMap = [ /// (PotentialUtf8::from_bytes(b"abc"), 11), /// (PotentialUtf8::from_bytes(b"def"), 22), /// (PotentialUtf8::from_bytes(b"ghi"), 33), /// ] /// .into_iter() /// .collect(); /// /// let key = "abc"; /// let value = map.get_copied(PotentialUtf8::from_str(key)); /// assert_eq!(Some(11), value); /// ``` /// /// [`ZeroMap`]: zerovec::ZeroMap #[repr(transparent)] #[derive(PartialEq, Eq, PartialOrd, Ord)] #[allow(clippy::exhaustive_structs)] // transparent newtype pub struct PotentialUtf8(pub [u8]); impl fmt::Debug for PotentialUtf8 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // Debug as a string if possible match self.try_as_str() { Ok(s) => fmt::Debug::fmt(s, f), Err(_) => fmt::Debug::fmt(&self.0, f), } } } impl PotentialUtf8 { /// Create a [`PotentialUtf8`] from a byte slice. #[inline] pub const fn from_bytes(other: &[u8]) -> &Self { // Safety: PotentialUtf8 is transparent over [u8] unsafe { core::mem::transmute(other) } } /// Create a [`PotentialUtf8`] from a string slice. #[inline] pub const fn from_str(s: &str) -> &Self { Self::from_bytes(s.as_bytes()) } /// Create a [`PotentialUtf8`] from boxed bytes. #[inline] #[cfg(feature = "alloc")] pub fn from_boxed_bytes(other: Box<[u8]>) -> Box { // Safety: PotentialUtf8 is transparent over [u8] unsafe { core::mem::transmute(other) } } /// Create a [`PotentialUtf8`] from a boxed `str`. #[inline] #[cfg(feature = "alloc")] pub fn from_boxed_str(other: Box) -> Box { Self::from_boxed_bytes(other.into_boxed_bytes()) } /// Get the bytes from a [`PotentialUtf8]. #[inline] pub const fn as_bytes(&self) -> &[u8] { &self.0 } /// Attempt to convert a [`PotentialUtf8`] to a `str`. /// /// # Examples /// /// ``` /// use potential_utf::PotentialUtf8; /// /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc"); /// /// let b = A.try_as_str().unwrap(); /// assert_eq!(b, "abc"); /// ``` // Note: this is const starting in 1.63 #[inline] pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> { core::str::from_utf8(&self.0) } } impl<'a> From<&'a str> for &'a PotentialUtf8 { #[inline] fn from(other: &'a str) -> Self { PotentialUtf8::from_str(other) } } impl PartialEq for PotentialUtf8 { fn eq(&self, other: &str) -> bool { self.eq(Self::from_str(other)) } } impl PartialOrd for PotentialUtf8 { fn partial_cmp(&self, other: &str) -> Option { self.partial_cmp(Self::from_str(other)) } } impl PartialEq for str { fn eq(&self, other: &PotentialUtf8) -> bool { PotentialUtf8::from_str(self).eq(other) } } impl PartialOrd for str { fn partial_cmp(&self, other: &PotentialUtf8) -> Option { PotentialUtf8::from_str(self).partial_cmp(other) } } #[cfg(feature = "alloc")] impl From> for Box { #[inline] fn from(other: Box) -> Self { PotentialUtf8::from_boxed_str(other) } } impl Deref for PotentialUtf8 { type Target = [u8]; fn deref(&self) -> &Self::Target { &self.0 } } /// This impl requires enabling the optional `zerovec` Cargo feature #[cfg(all(feature = "zerovec", feature = "alloc"))] impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 { type Container = zerovec::VarZeroVec<'a, PotentialUtf8>; type Slice = zerovec::VarZeroSlice; type GetType = PotentialUtf8; type OwnedType = Box; } // Safety (based on the safety checklist on the VarULE trait): // 1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE) // 2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE) // 3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible) // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible) // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly) // 6. All other methods are defaulted // 7. `[T]` byte equality is semantic equality (transparent over a ULE) /// This impl requires enabling the optional `zerovec` Cargo feature #[cfg(feature = "zerovec")] unsafe impl zerovec::ule::VarULE for PotentialUtf8 { #[inline] fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> { Ok(()) } #[inline] unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { PotentialUtf8::from_bytes(bytes) } } /// This impl requires enabling the optional `serde` Cargo feature #[cfg(feature = "serde")] impl serde::Serialize for PotentialUtf8 { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { use serde::ser::Error; let s = self .try_as_str() .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?; if serializer.is_human_readable() { serializer.serialize_str(s) } else { serializer.serialize_bytes(s.as_bytes()) } } } /// This impl requires enabling the optional `serde` Cargo feature #[cfg(all(feature = "serde", feature = "alloc"))] impl<'de> serde::Deserialize<'de> for Box { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { if deserializer.is_human_readable() { let boxed_str = Box::::deserialize(deserializer)?; Ok(PotentialUtf8::from_boxed_str(boxed_str)) } else { let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?; Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes)) } } } /// This impl requires enabling the optional `serde` Cargo feature #[cfg(feature = "serde")] impl<'de, 'a> serde::Deserialize<'de> for &'a PotentialUtf8 where 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { if deserializer.is_human_readable() { let s = <&str>::deserialize(deserializer)?; Ok(PotentialUtf8::from_str(s)) } else { let bytes = <&[u8]>::deserialize(deserializer)?; Ok(PotentialUtf8::from_bytes(bytes)) } } } #[repr(transparent)] #[derive(PartialEq, Eq, PartialOrd, Ord)] #[allow(clippy::exhaustive_structs)] // transparent newtype pub struct PotentialUtf16(pub [u16]); impl fmt::Debug for PotentialUtf16 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // Debug as a string if possible for c in char::decode_utf16(self.0.iter().copied()) { match c { Ok(c) => write!(f, "{c}")?, Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?, } } Ok(()) } } impl PotentialUtf16 { /// Create a [`PotentialUtf16`] from a u16 slice. #[inline] pub const fn from_slice(other: &[u16]) -> &Self { // Safety: PotentialUtf16 is transparent over [u16] unsafe { core::mem::transmute(other) } } }