1 #[cfg(any(feature = "alloc", feature = "std", test))]
2 use alloc::string::String;
3 use core::cmp;
4 #[cfg(any(feature = "alloc", feature = "std", test))]
5 use core::str;
6
7 use crate::encode::add_padding;
8 use crate::engine::{Config, Engine};
9
10 /// The output mechanism for ChunkedEncoder's encoded bytes.
11 pub trait Sink {
12 type Error;
13
14 /// Handle a chunk of encoded base64 data (as UTF-8 bytes)
write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>15 fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
16 }
17
18 const BUF_SIZE: usize = 1024;
19
20 /// A base64 encoder that emits encoded bytes in chunks without heap allocation.
21 pub struct ChunkedEncoder<'e, E: Engine + ?Sized> {
22 engine: &'e E,
23 max_input_chunk_len: usize,
24 }
25
26 impl<'e, E: Engine + ?Sized> ChunkedEncoder<'e, E> {
new(engine: &'e E) -> ChunkedEncoder<'e, E>27 pub fn new(engine: &'e E) -> ChunkedEncoder<'e, E> {
28 ChunkedEncoder {
29 engine,
30 max_input_chunk_len: max_input_length(BUF_SIZE, engine.config().encode_padding()),
31 }
32 }
33
encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error>34 pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
35 let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
36 let mut input_index = 0;
37
38 while input_index < bytes.len() {
39 // either the full input chunk size, or it's the last iteration
40 let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index);
41
42 let chunk = &bytes[input_index..(input_index + input_chunk_len)];
43
44 let mut b64_bytes_written = self.engine.internal_encode(chunk, &mut encode_buf);
45
46 input_index += input_chunk_len;
47 let more_input_left = input_index < bytes.len();
48
49 if self.engine.config().encode_padding() && !more_input_left {
50 // no more input, add padding if needed. Buffer will have room because
51 // max_input_length leaves room for it.
52 b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]);
53 }
54
55 sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?;
56 }
57
58 Ok(())
59 }
60 }
61
62 /// Calculate the longest input that can be encoded for the given output buffer size.
63 ///
64 /// If the config requires padding, two bytes of buffer space will be set aside so that the last
65 /// chunk of input can be encoded safely.
66 ///
67 /// The input length will always be a multiple of 3 so that no encoding state has to be carried over
68 /// between chunks.
max_input_length(encoded_buf_len: usize, padded: bool) -> usize69 fn max_input_length(encoded_buf_len: usize, padded: bool) -> usize {
70 let effective_buf_len = if padded {
71 // make room for padding
72 encoded_buf_len
73 .checked_sub(2)
74 .expect("Don't use a tiny buffer")
75 } else {
76 encoded_buf_len
77 };
78
79 // No padding, so just normal base64 expansion.
80 (effective_buf_len / 4) * 3
81 }
82
83 // A really simple sink that just appends to a string
84 #[cfg(any(feature = "alloc", feature = "std", test))]
85 pub(crate) struct StringSink<'a> {
86 string: &'a mut String,
87 }
88
89 #[cfg(any(feature = "alloc", feature = "std", test))]
90 impl<'a> StringSink<'a> {
new(s: &mut String) -> StringSink91 pub(crate) fn new(s: &mut String) -> StringSink {
92 StringSink { string: s }
93 }
94 }
95
96 #[cfg(any(feature = "alloc", feature = "std", test))]
97 impl<'a> Sink for StringSink<'a> {
98 type Error = ();
99
write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error>100 fn write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error> {
101 self.string.push_str(str::from_utf8(s).unwrap());
102
103 Ok(())
104 }
105 }
106
107 #[cfg(test)]
108 pub mod tests {
109 use rand::{
110 distributions::{Distribution, Uniform},
111 Rng, SeedableRng,
112 };
113
114 use crate::{
115 alphabet::STANDARD,
116 engine::general_purpose::{GeneralPurpose, GeneralPurposeConfig, PAD},
117 tests::random_engine,
118 };
119
120 use super::*;
121
122 #[test]
chunked_encode_empty()123 fn chunked_encode_empty() {
124 assert_eq!("", chunked_encode_str(&[], PAD));
125 }
126
127 #[test]
chunked_encode_intermediate_fast_loop()128 fn chunked_encode_intermediate_fast_loop() {
129 // > 8 bytes input, will enter the pretty fast loop
130 assert_eq!("Zm9vYmFyYmF6cXV4", chunked_encode_str(b"foobarbazqux", PAD));
131 }
132
133 #[test]
chunked_encode_fast_loop()134 fn chunked_encode_fast_loop() {
135 // > 32 bytes input, will enter the uber fast loop
136 assert_eq!(
137 "Zm9vYmFyYmF6cXV4cXV1eGNvcmdlZ3JhdWx0Z2FycGx5eg==",
138 chunked_encode_str(b"foobarbazquxquuxcorgegraultgarplyz", PAD)
139 );
140 }
141
142 #[test]
chunked_encode_slow_loop_only()143 fn chunked_encode_slow_loop_only() {
144 // < 8 bytes input, slow loop only
145 assert_eq!("Zm9vYmFy", chunked_encode_str(b"foobar", PAD));
146 }
147
148 #[test]
chunked_encode_matches_normal_encode_random_string_sink()149 fn chunked_encode_matches_normal_encode_random_string_sink() {
150 let helper = StringSinkTestHelper;
151 chunked_encode_matches_normal_encode_random(&helper);
152 }
153
154 #[test]
max_input_length_no_pad()155 fn max_input_length_no_pad() {
156 assert_eq!(768, max_input_length(1024, false));
157 }
158
159 #[test]
max_input_length_with_pad_decrements_one_triple()160 fn max_input_length_with_pad_decrements_one_triple() {
161 assert_eq!(765, max_input_length(1024, true));
162 }
163
164 #[test]
max_input_length_with_pad_one_byte_short()165 fn max_input_length_with_pad_one_byte_short() {
166 assert_eq!(765, max_input_length(1025, true));
167 }
168
169 #[test]
max_input_length_with_pad_fits_exactly()170 fn max_input_length_with_pad_fits_exactly() {
171 assert_eq!(768, max_input_length(1026, true));
172 }
173
174 #[test]
max_input_length_cant_use_extra_single_encoded_byte()175 fn max_input_length_cant_use_extra_single_encoded_byte() {
176 assert_eq!(300, max_input_length(401, false));
177 }
178
chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S)179 pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) {
180 let mut input_buf: Vec<u8> = Vec::new();
181 let mut output_buf = String::new();
182 let mut rng = rand::rngs::SmallRng::from_entropy();
183 let input_len_range = Uniform::new(1, 10_000);
184
185 for _ in 0..5_000 {
186 input_buf.clear();
187 output_buf.clear();
188
189 let buf_len = input_len_range.sample(&mut rng);
190 for _ in 0..buf_len {
191 input_buf.push(rng.gen());
192 }
193
194 let engine = random_engine(&mut rng);
195
196 let chunk_encoded_string = sink_test_helper.encode_to_string(&engine, &input_buf);
197 engine.encode_string(&input_buf, &mut output_buf);
198
199 assert_eq!(output_buf, chunk_encoded_string, "input len={}", buf_len);
200 }
201 }
202
chunked_encode_str(bytes: &[u8], config: GeneralPurposeConfig) -> String203 fn chunked_encode_str(bytes: &[u8], config: GeneralPurposeConfig) -> String {
204 let mut s = String::new();
205
206 let mut sink = StringSink::new(&mut s);
207 let engine = GeneralPurpose::new(&STANDARD, config);
208 let encoder = ChunkedEncoder::new(&engine);
209 encoder.encode(bytes, &mut sink).unwrap();
210
211 s
212 }
213
214 // An abstraction around sinks so that we can have tests that easily to any sink implementation
215 pub trait SinkTestHelper {
encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String216 fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String;
217 }
218
219 struct StringSinkTestHelper;
220
221 impl SinkTestHelper for StringSinkTestHelper {
encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String222 fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String {
223 let encoder = ChunkedEncoder::new(engine);
224 let mut s = String::new();
225 let mut sink = StringSink::new(&mut s);
226 encoder.encode(bytes, &mut sink).unwrap();
227
228 s
229 }
230 }
231 }
232