• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 #[cfg(feature = "alloc")]
6 use alloc::boxed::Box;
7 use core::cmp::Ordering;
8 use core::fmt;
9 use core::ops::Deref;
10 
11 /// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
12 ///
13 /// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
14 /// example, strings that are keys of a map don't need to ever be reified as `str`s.
15 ///
16 /// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
17 ///
18 /// The main advantage of this type over `[u8]` is that it serializes as a string in
19 /// human-readable formats like JSON.
20 ///
21 /// # Examples
22 ///
23 /// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]:
24 ///
25 /// ```
26 /// use potential_utf::PotentialUtf8;
27 /// use zerovec::ZeroMap;
28 ///
29 /// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation.
30 /// let map: ZeroMap<PotentialUtf8, u8> = [
31 ///     (PotentialUtf8::from_bytes(b"abc"), 11),
32 ///     (PotentialUtf8::from_bytes(b"def"), 22),
33 ///     (PotentialUtf8::from_bytes(b"ghi"), 33),
34 /// ]
35 /// .into_iter()
36 /// .collect();
37 ///
38 /// let key = "abc";
39 /// let value = map.get_copied(PotentialUtf8::from_str(key));
40 /// assert_eq!(Some(11), value);
41 /// ```
42 ///
43 /// [`ZeroMap`]: zerovec::ZeroMap
44 #[repr(transparent)]
45 #[derive(PartialEq, Eq, PartialOrd, Ord)]
46 #[allow(clippy::exhaustive_structs)] // transparent newtype
47 pub struct PotentialUtf8(pub [u8]);
48 
49 impl fmt::Debug for PotentialUtf8 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result50     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51         // Debug as a string if possible
52         match self.try_as_str() {
53             Ok(s) => fmt::Debug::fmt(s, f),
54             Err(_) => fmt::Debug::fmt(&self.0, f),
55         }
56     }
57 }
58 
59 impl PotentialUtf8 {
60     /// Create a [`PotentialUtf8`] from a byte slice.
61     #[inline]
from_bytes(other: &[u8]) -> &Self62     pub const fn from_bytes(other: &[u8]) -> &Self {
63         // Safety: PotentialUtf8 is transparent over [u8]
64         unsafe { core::mem::transmute(other) }
65     }
66 
67     /// Create a [`PotentialUtf8`] from a string slice.
68     #[inline]
from_str(s: &str) -> &Self69     pub const fn from_str(s: &str) -> &Self {
70         Self::from_bytes(s.as_bytes())
71     }
72 
73     /// Create a [`PotentialUtf8`] from boxed bytes.
74     #[inline]
75     #[cfg(feature = "alloc")]
from_boxed_bytes(other: Box<[u8]>) -> Box<Self>76     pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
77         // Safety: PotentialUtf8 is transparent over [u8]
78         unsafe { core::mem::transmute(other) }
79     }
80 
81     /// Create a [`PotentialUtf8`] from a boxed `str`.
82     #[inline]
83     #[cfg(feature = "alloc")]
from_boxed_str(other: Box<str>) -> Box<Self>84     pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
85         Self::from_boxed_bytes(other.into_boxed_bytes())
86     }
87 
88     /// Get the bytes from a [`PotentialUtf8].
89     #[inline]
as_bytes(&self) -> &[u8]90     pub const fn as_bytes(&self) -> &[u8] {
91         &self.0
92     }
93 
94     /// Attempt to convert a [`PotentialUtf8`] to a `str`.
95     ///
96     /// # Examples
97     ///
98     /// ```
99     /// use potential_utf::PotentialUtf8;
100     ///
101     /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc");
102     ///
103     /// let b = A.try_as_str().unwrap();
104     /// assert_eq!(b, "abc");
105     /// ```
106     // Note: this is const starting in 1.63
107     #[inline]
try_as_str(&self) -> Result<&str, core::str::Utf8Error>108     pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
109         core::str::from_utf8(&self.0)
110     }
111 }
112 
113 impl<'a> From<&'a str> for &'a PotentialUtf8 {
114     #[inline]
from(other: &'a str) -> Self115     fn from(other: &'a str) -> Self {
116         PotentialUtf8::from_str(other)
117     }
118 }
119 
120 impl PartialEq<str> for PotentialUtf8 {
eq(&self, other: &str) -> bool121     fn eq(&self, other: &str) -> bool {
122         self.eq(Self::from_str(other))
123     }
124 }
125 
126 impl PartialOrd<str> for PotentialUtf8 {
partial_cmp(&self, other: &str) -> Option<Ordering>127     fn partial_cmp(&self, other: &str) -> Option<Ordering> {
128         self.partial_cmp(Self::from_str(other))
129     }
130 }
131 
132 impl PartialEq<PotentialUtf8> for str {
eq(&self, other: &PotentialUtf8) -> bool133     fn eq(&self, other: &PotentialUtf8) -> bool {
134         PotentialUtf8::from_str(self).eq(other)
135     }
136 }
137 
138 impl PartialOrd<PotentialUtf8> for str {
partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering>139     fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> {
140         PotentialUtf8::from_str(self).partial_cmp(other)
141     }
142 }
143 
144 #[cfg(feature = "alloc")]
145 impl From<Box<str>> for Box<PotentialUtf8> {
146     #[inline]
from(other: Box<str>) -> Self147     fn from(other: Box<str>) -> Self {
148         PotentialUtf8::from_boxed_str(other)
149     }
150 }
151 
152 impl Deref for PotentialUtf8 {
153     type Target = [u8];
deref(&self) -> &Self::Target154     fn deref(&self) -> &Self::Target {
155         &self.0
156     }
157 }
158 
159 /// This impl requires enabling the optional `zerovec` Cargo feature
160 #[cfg(all(feature = "zerovec", feature = "alloc"))]
161 impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 {
162     type Container = zerovec::VarZeroVec<'a, PotentialUtf8>;
163     type Slice = zerovec::VarZeroSlice<PotentialUtf8>;
164     type GetType = PotentialUtf8;
165     type OwnedType = Box<PotentialUtf8>;
166 }
167 
168 // Safety (based on the safety checklist on the VarULE trait):
169 //  1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE)
170 //  2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE)
171 //  3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible)
172 //  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible)
173 //  5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly)
174 //  6. All other methods are defaulted
175 //  7. `[T]` byte equality is semantic equality (transparent over a ULE)
176 /// This impl requires enabling the optional `zerovec` Cargo feature
177 #[cfg(feature = "zerovec")]
178 unsafe impl zerovec::ule::VarULE for PotentialUtf8 {
179     #[inline]
validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError>180     fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> {
181         Ok(())
182     }
183     #[inline]
from_bytes_unchecked(bytes: &[u8]) -> &Self184     unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
185         PotentialUtf8::from_bytes(bytes)
186     }
187 }
188 
189 /// This impl requires enabling the optional `serde` Cargo feature
190 #[cfg(feature = "serde")]
191 impl serde::Serialize for PotentialUtf8 {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,192     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
193     where
194         S: serde::Serializer,
195     {
196         use serde::ser::Error;
197         let s = self
198             .try_as_str()
199             .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?;
200         if serializer.is_human_readable() {
201             serializer.serialize_str(s)
202         } else {
203             serializer.serialize_bytes(s.as_bytes())
204         }
205     }
206 }
207 
208 /// This impl requires enabling the optional `serde` Cargo feature
209 #[cfg(all(feature = "serde", feature = "alloc"))]
210 impl<'de> serde::Deserialize<'de> for Box<PotentialUtf8> {
deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,211     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
212     where
213         D: serde::Deserializer<'de>,
214     {
215         if deserializer.is_human_readable() {
216             let boxed_str = Box::<str>::deserialize(deserializer)?;
217             Ok(PotentialUtf8::from_boxed_str(boxed_str))
218         } else {
219             let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
220             Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes))
221         }
222     }
223 }
224 
225 /// This impl requires enabling the optional `serde` Cargo feature
226 #[cfg(feature = "serde")]
227 impl<'de, 'a> serde::Deserialize<'de> for &'a PotentialUtf8
228 where
229     'de: 'a,
230 {
deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,231     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
232     where
233         D: serde::Deserializer<'de>,
234     {
235         if deserializer.is_human_readable() {
236             let s = <&str>::deserialize(deserializer)?;
237             Ok(PotentialUtf8::from_str(s))
238         } else {
239             let bytes = <&[u8]>::deserialize(deserializer)?;
240             Ok(PotentialUtf8::from_bytes(bytes))
241         }
242     }
243 }
244 
245 #[repr(transparent)]
246 #[derive(PartialEq, Eq, PartialOrd, Ord)]
247 #[allow(clippy::exhaustive_structs)] // transparent newtype
248 pub struct PotentialUtf16(pub [u16]);
249 
250 impl fmt::Debug for PotentialUtf16 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result251     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252         // Debug as a string if possible
253         for c in char::decode_utf16(self.0.iter().copied()) {
254             match c {
255                 Ok(c) => write!(f, "{c}")?,
256                 Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?,
257             }
258         }
259         Ok(())
260     }
261 }
262 
263 impl PotentialUtf16 {
264     /// Create a [`PotentialUtf16`] from a u16 slice.
265     #[inline]
from_slice(other: &[u16]) -> &Self266     pub const fn from_slice(other: &[u16]) -> &Self {
267         // Safety: PotentialUtf16 is transparent over [u16]
268         unsafe { core::mem::transmute(other) }
269     }
270 }
271