• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::Writeable;
6 use alloc::borrow::Cow;
7 use alloc::string::String;
8 use core::fmt;
9 
10 /// Bytes that have been partially validated as UTF-8 up to an offset.
11 struct PartiallyValidatedUtf8<'a> {
12     // Safety Invariants:
13     // 1. The offset is less than or equal to the length of the slice.
14     // 2. The slice is valid UTF-8 up to the offset.
15     slice: &'a [u8],
16     offset: usize,
17 }
18 
19 impl<'a> PartiallyValidatedUtf8<'a> {
new(slice: &'a [u8]) -> Self20     fn new(slice: &'a [u8]) -> Self {
21         // Safety: Field invariants maintained here trivially:
22         //   1. The offset 0 is ≤ all possible lengths of slice
23         //   2. The slice contains nothing up to the offset zero
24         Self { slice, offset: 0 }
25     }
26 
27     /// Check whether the given string is the next chunk of unvalidated bytes.
28     /// If so, increment offset and return true. Otherwise, return false.
try_push(&mut self, valid_str: &str) -> bool29     fn try_push(&mut self, valid_str: &str) -> bool {
30         let new_offset = self.offset + valid_str.len();
31         if self.slice.get(self.offset..new_offset) == Some(valid_str.as_bytes()) {
32             // Safety: Field invariants maintained here:
33             //   1. In the line above, `self.slice.get()` returned `Some()` for `new_offset` at
34             //      the end of a `Range`, so `new_offset` is ≤ the length of `self.slice`.
35             //   2. By invariant, we have already validated the string up to `self.offset`, and
36             //      the portion of the slice between `self.offset` and `new_offset` is equal to
37             //      `valid_str`, which is a `&str`, so the string is valid up to `new_offset`.
38             self.offset = new_offset;
39             true
40         } else {
41             false
42         }
43     }
44 
45     /// Return the validated portion as `&str`.
validated_as_str(&self) -> &'a str46     fn validated_as_str(&self) -> &'a str {
47         debug_assert!(self.offset <= self.slice.len());
48         // Safety: self.offset is a valid end index in a range (from field invariant)
49         let valid_slice = unsafe { self.slice.get_unchecked(..self.offset) };
50         debug_assert!(core::str::from_utf8(valid_slice).is_ok());
51         // Safety: the UTF-8 of slice has been validated up to offset (from field invariant)
52         unsafe { core::str::from_utf8_unchecked(valid_slice) }
53     }
54 }
55 
56 enum SliceOrString<'a> {
57     Slice(PartiallyValidatedUtf8<'a>),
58     String(String),
59 }
60 
61 /// This is an infallible impl. Functions always return Ok, not Err.
62 impl fmt::Write for SliceOrString<'_> {
63     #[inline]
write_str(&mut self, other: &str) -> fmt::Result64     fn write_str(&mut self, other: &str) -> fmt::Result {
65         match self {
66             SliceOrString::Slice(slice) => {
67                 if !slice.try_push(other) {
68                     // We failed to match. Convert to owned.
69                     let valid_str = slice.validated_as_str();
70                     let mut owned = String::with_capacity(valid_str.len() + other.len());
71                     owned.push_str(valid_str);
72                     owned.push_str(other);
73                     *self = SliceOrString::String(owned);
74                 }
75                 Ok(())
76             }
77             SliceOrString::String(owned) => owned.write_str(other),
78         }
79     }
80 }
81 
82 impl<'a> SliceOrString<'a> {
83     #[inline]
new(slice: &'a [u8]) -> Self84     fn new(slice: &'a [u8]) -> Self {
85         Self::Slice(PartiallyValidatedUtf8::new(slice))
86     }
87 
88     #[inline]
finish(self) -> Cow<'a, str>89     fn finish(self) -> Cow<'a, str> {
90         match self {
91             SliceOrString::Slice(slice) => Cow::Borrowed(slice.validated_as_str()),
92             SliceOrString::String(owned) => Cow::Owned(owned),
93         }
94     }
95 }
96 
97 /// Writes the contents of a `Writeable` to a string, returning a reference
98 /// to a slice if it matches the provided reference bytes, and allocating a
99 /// String otherwise.
100 ///
101 /// This function is useful if you have borrowed bytes which you expect
102 /// to be equal to a writeable a high percentage of the time.
103 ///
104 /// You can also use this function to make a more efficient implementation of
105 /// [`Writeable::write_to_string`].
106 ///
107 /// # Examples
108 ///
109 /// Basic usage and behavior:
110 ///
111 /// ```
112 /// use std::fmt;
113 /// use std::borrow::Cow;
114 /// use writeable::Writeable;
115 ///
116 /// struct WelcomeMessage<'s> {
117 ///     pub name: &'s str,
118 /// }
119 ///
120 /// impl<'s> Writeable for WelcomeMessage<'s> {
121 ///     // see impl in Writeable docs
122 /// #    fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
123 /// #        sink.write_str("Hello, ")?;
124 /// #        sink.write_str(self.name)?;
125 /// #        sink.write_char('!')?;
126 /// #        Ok(())
127 /// #    }
128 /// }
129 ///
130 /// let message = WelcomeMessage { name: "Alice" };
131 ///
132 /// assert!(matches!(
133 ///     writeable::to_string_or_borrow(&message, b""),
134 ///     Cow::Owned(s) if s == "Hello, Alice!"
135 /// ));
136 /// assert!(matches!(
137 ///     writeable::to_string_or_borrow(&message, b"Hello"),
138 ///     Cow::Owned(s) if s == "Hello, Alice!"
139 /// ));
140 /// assert!(matches!(
141 ///     writeable::to_string_or_borrow(&message, b"Hello, Bob!"),
142 ///     Cow::Owned(s) if s == "Hello, Alice!"
143 /// ));
144 /// assert!(matches!(
145 ///     writeable::to_string_or_borrow(&message, b"Hello, Alice!"),
146 ///     Cow::Borrowed("Hello, Alice!")
147 /// ));
148 ///
149 /// // Borrowing can use a prefix:
150 /// assert!(matches!(
151 ///     writeable::to_string_or_borrow(&message, b"Hello, Alice!..\xFF\x00\xFF"),
152 ///     Cow::Borrowed("Hello, Alice!")
153 /// ));
154 /// ```
155 ///
156 /// Example use case: a function that transforms a string to lowercase.
157 /// We are also able to write a more efficient implementation of
158 /// [`Writeable::write_to_string`] in this situation.
159 ///
160 /// ```
161 /// use std::fmt;
162 /// use std::borrow::Cow;
163 /// use writeable::Writeable;
164 ///
165 /// struct MakeAsciiLower<'a>(&'a str);
166 ///
167 /// impl<'a> Writeable for MakeAsciiLower<'a> {
168 ///     fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
169 ///         for c in self.0.chars() {
170 ///             sink.write_char(c.to_ascii_lowercase())?;
171 ///         }
172 ///         Ok(())
173 ///     }
174 ///     #[inline]
175 ///     fn write_to_string(&self) -> Cow<str> {
176 ///         writeable::to_string_or_borrow(self, self.0.as_bytes())
177 ///     }
178 /// }
179 ///
180 /// fn make_lowercase(input: &str) -> Cow<str> {
181 ///     let writeable = MakeAsciiLower(input);
182 ///     writeable::to_string_or_borrow(&writeable, input.as_bytes())
183 /// }
184 ///
185 /// assert!(matches!(
186 ///     make_lowercase("this is lowercase"),
187 ///     Cow::Borrowed("this is lowercase")
188 /// ));
189 /// assert!(matches!(
190 ///     make_lowercase("this is UPPERCASE"),
191 ///     Cow::Owned(s) if s == "this is uppercase"
192 /// ));
193 ///
194 /// assert!(matches!(
195 ///     MakeAsciiLower("this is lowercase").write_to_string(),
196 ///     Cow::Borrowed("this is lowercase")
197 /// ));
198 /// assert!(matches!(
199 ///     MakeAsciiLower("this is UPPERCASE").write_to_string(),
200 ///     Cow::Owned(s) if s == "this is uppercase"
201 /// ));
202 /// ```
to_string_or_borrow<'a>( writeable: &impl Writeable, reference_bytes: &'a [u8], ) -> Cow<'a, str>203 pub fn to_string_or_borrow<'a>(
204     writeable: &impl Writeable,
205     reference_bytes: &'a [u8],
206 ) -> Cow<'a, str> {
207     let mut sink = SliceOrString::new(reference_bytes);
208     let _ = writeable.write_to(&mut sink);
209     sink.finish()
210 }
211