1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 #[cfg(feature = "alloc")] 6 use alloc::boxed::Box; 7 use core::cmp::Ordering; 8 use core::fmt; 9 use core::ops::Deref; 10 11 /// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant. 12 /// 13 /// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For 14 /// example, strings that are keys of a map don't need to ever be reified as `str`s. 15 /// 16 /// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`]. 17 /// 18 /// The main advantage of this type over `[u8]` is that it serializes as a string in 19 /// human-readable formats like JSON. 20 /// 21 /// # Examples 22 /// 23 /// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]: 24 /// 25 /// ``` 26 /// use potential_utf::PotentialUtf8; 27 /// use zerovec::ZeroMap; 28 /// 29 /// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation. 30 /// let map: ZeroMap<PotentialUtf8, u8> = [ 31 /// (PotentialUtf8::from_bytes(b"abc"), 11), 32 /// (PotentialUtf8::from_bytes(b"def"), 22), 33 /// (PotentialUtf8::from_bytes(b"ghi"), 33), 34 /// ] 35 /// .into_iter() 36 /// .collect(); 37 /// 38 /// let key = "abc"; 39 /// let value = map.get_copied(PotentialUtf8::from_str(key)); 40 /// assert_eq!(Some(11), value); 41 /// ``` 42 /// 43 /// [`ZeroMap`]: zerovec::ZeroMap 44 #[repr(transparent)] 45 #[derive(PartialEq, Eq, PartialOrd, Ord)] 46 #[allow(clippy::exhaustive_structs)] // transparent newtype 47 pub struct PotentialUtf8(pub [u8]); 48 49 impl fmt::Debug for PotentialUtf8 { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result50 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 51 // Debug as a string if possible 52 match self.try_as_str() { 53 Ok(s) => fmt::Debug::fmt(s, f), 54 Err(_) => fmt::Debug::fmt(&self.0, f), 55 } 56 } 57 } 58 59 impl PotentialUtf8 { 60 /// Create a [`PotentialUtf8`] from a byte slice. 61 #[inline] from_bytes(other: &[u8]) -> &Self62 pub const fn from_bytes(other: &[u8]) -> &Self { 63 // Safety: PotentialUtf8 is transparent over [u8] 64 unsafe { core::mem::transmute(other) } 65 } 66 67 /// Create a [`PotentialUtf8`] from a string slice. 68 #[inline] from_str(s: &str) -> &Self69 pub const fn from_str(s: &str) -> &Self { 70 Self::from_bytes(s.as_bytes()) 71 } 72 73 /// Create a [`PotentialUtf8`] from boxed bytes. 74 #[inline] 75 #[cfg(feature = "alloc")] from_boxed_bytes(other: Box<[u8]>) -> Box<Self>76 pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> { 77 // Safety: PotentialUtf8 is transparent over [u8] 78 unsafe { core::mem::transmute(other) } 79 } 80 81 /// Create a [`PotentialUtf8`] from a boxed `str`. 82 #[inline] 83 #[cfg(feature = "alloc")] from_boxed_str(other: Box<str>) -> Box<Self>84 pub fn from_boxed_str(other: Box<str>) -> Box<Self> { 85 Self::from_boxed_bytes(other.into_boxed_bytes()) 86 } 87 88 /// Get the bytes from a [`PotentialUtf8]. 89 #[inline] as_bytes(&self) -> &[u8]90 pub const fn as_bytes(&self) -> &[u8] { 91 &self.0 92 } 93 94 /// Attempt to convert a [`PotentialUtf8`] to a `str`. 95 /// 96 /// # Examples 97 /// 98 /// ``` 99 /// use potential_utf::PotentialUtf8; 100 /// 101 /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc"); 102 /// 103 /// let b = A.try_as_str().unwrap(); 104 /// assert_eq!(b, "abc"); 105 /// ``` 106 // Note: this is const starting in 1.63 107 #[inline] try_as_str(&self) -> Result<&str, core::str::Utf8Error>108 pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> { 109 core::str::from_utf8(&self.0) 110 } 111 } 112 113 impl<'a> From<&'a str> for &'a PotentialUtf8 { 114 #[inline] from(other: &'a str) -> Self115 fn from(other: &'a str) -> Self { 116 PotentialUtf8::from_str(other) 117 } 118 } 119 120 impl PartialEq<str> for PotentialUtf8 { eq(&self, other: &str) -> bool121 fn eq(&self, other: &str) -> bool { 122 self.eq(Self::from_str(other)) 123 } 124 } 125 126 impl PartialOrd<str> for PotentialUtf8 { partial_cmp(&self, other: &str) -> Option<Ordering>127 fn partial_cmp(&self, other: &str) -> Option<Ordering> { 128 self.partial_cmp(Self::from_str(other)) 129 } 130 } 131 132 impl PartialEq<PotentialUtf8> for str { eq(&self, other: &PotentialUtf8) -> bool133 fn eq(&self, other: &PotentialUtf8) -> bool { 134 PotentialUtf8::from_str(self).eq(other) 135 } 136 } 137 138 impl PartialOrd<PotentialUtf8> for str { partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering>139 fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> { 140 PotentialUtf8::from_str(self).partial_cmp(other) 141 } 142 } 143 144 #[cfg(feature = "alloc")] 145 impl From<Box<str>> for Box<PotentialUtf8> { 146 #[inline] from(other: Box<str>) -> Self147 fn from(other: Box<str>) -> Self { 148 PotentialUtf8::from_boxed_str(other) 149 } 150 } 151 152 impl Deref for PotentialUtf8 { 153 type Target = [u8]; deref(&self) -> &Self::Target154 fn deref(&self) -> &Self::Target { 155 &self.0 156 } 157 } 158 159 /// This impl requires enabling the optional `zerovec` Cargo feature 160 #[cfg(all(feature = "zerovec", feature = "alloc"))] 161 impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 { 162 type Container = zerovec::VarZeroVec<'a, PotentialUtf8>; 163 type Slice = zerovec::VarZeroSlice<PotentialUtf8>; 164 type GetType = PotentialUtf8; 165 type OwnedType = Box<PotentialUtf8>; 166 } 167 168 // Safety (based on the safety checklist on the VarULE trait): 169 // 1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE) 170 // 2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE) 171 // 3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible) 172 // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible) 173 // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly) 174 // 6. All other methods are defaulted 175 // 7. `[T]` byte equality is semantic equality (transparent over a ULE) 176 /// This impl requires enabling the optional `zerovec` Cargo feature 177 #[cfg(feature = "zerovec")] 178 unsafe impl zerovec::ule::VarULE for PotentialUtf8 { 179 #[inline] validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError>180 fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> { 181 Ok(()) 182 } 183 #[inline] from_bytes_unchecked(bytes: &[u8]) -> &Self184 unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { 185 PotentialUtf8::from_bytes(bytes) 186 } 187 } 188 189 /// This impl requires enabling the optional `serde` Cargo feature 190 #[cfg(feature = "serde")] 191 impl serde::Serialize for PotentialUtf8 { serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,192 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 193 where 194 S: serde::Serializer, 195 { 196 use serde::ser::Error; 197 let s = self 198 .try_as_str() 199 .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?; 200 if serializer.is_human_readable() { 201 serializer.serialize_str(s) 202 } else { 203 serializer.serialize_bytes(s.as_bytes()) 204 } 205 } 206 } 207 208 /// This impl requires enabling the optional `serde` Cargo feature 209 #[cfg(all(feature = "serde", feature = "alloc"))] 210 impl<'de> serde::Deserialize<'de> for Box<PotentialUtf8> { deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,211 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 212 where 213 D: serde::Deserializer<'de>, 214 { 215 if deserializer.is_human_readable() { 216 let boxed_str = Box::<str>::deserialize(deserializer)?; 217 Ok(PotentialUtf8::from_boxed_str(boxed_str)) 218 } else { 219 let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?; 220 Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes)) 221 } 222 } 223 } 224 225 /// This impl requires enabling the optional `serde` Cargo feature 226 #[cfg(feature = "serde")] 227 impl<'de, 'a> serde::Deserialize<'de> for &'a PotentialUtf8 228 where 229 'de: 'a, 230 { deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,231 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 232 where 233 D: serde::Deserializer<'de>, 234 { 235 if deserializer.is_human_readable() { 236 let s = <&str>::deserialize(deserializer)?; 237 Ok(PotentialUtf8::from_str(s)) 238 } else { 239 let bytes = <&[u8]>::deserialize(deserializer)?; 240 Ok(PotentialUtf8::from_bytes(bytes)) 241 } 242 } 243 } 244 245 #[repr(transparent)] 246 #[derive(PartialEq, Eq, PartialOrd, Ord)] 247 #[allow(clippy::exhaustive_structs)] // transparent newtype 248 pub struct PotentialUtf16(pub [u16]); 249 250 impl fmt::Debug for PotentialUtf16 { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result251 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 252 // Debug as a string if possible 253 for c in char::decode_utf16(self.0.iter().copied()) { 254 match c { 255 Ok(c) => write!(f, "{c}")?, 256 Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?, 257 } 258 } 259 Ok(()) 260 } 261 } 262 263 impl PotentialUtf16 { 264 /// Create a [`PotentialUtf16`] from a u16 slice. 265 #[inline] from_slice(other: &[u16]) -> &Self266 pub const fn from_slice(other: &[u16]) -> &Self { 267 // Safety: PotentialUtf16 is transparent over [u16] 268 unsafe { core::mem::transmute(other) } 269 } 270 } 271