• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::{engine::Engine, DecodeError};
2 use std::{cmp, fmt, io};
3 
4 // This should be large, but it has to fit on the stack.
5 pub(crate) const BUF_SIZE: usize = 1024;
6 
7 // 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
8 const BASE64_CHUNK_SIZE: usize = 4;
9 const DECODED_CHUNK_SIZE: usize = 3;
10 
11 /// A `Read` implementation that decodes base64 data read from an underlying reader.
12 ///
13 /// # Examples
14 ///
15 /// ```
16 /// use std::io::Read;
17 /// use std::io::Cursor;
18 /// use base64::engine::general_purpose;
19 ///
20 /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21 /// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22 /// let mut decoder = base64::read::DecoderReader::new(
23 ///     &mut wrapped_reader,
24 ///     &general_purpose::STANDARD);
25 ///
26 /// // handle errors as you normally would
27 /// let mut result = Vec::new();
28 /// decoder.read_to_end(&mut result).unwrap();
29 ///
30 /// assert_eq!(b"asdf", &result[..]);
31 ///
32 /// ```
33 pub struct DecoderReader<'e, E: Engine, R: io::Read> {
34     engine: &'e E,
35     /// Where b64 data is read from
36     inner: R,
37 
38     // Holds b64 data read from the delegate reader.
39     b64_buffer: [u8; BUF_SIZE],
40     // The start of the pending buffered data in b64_buffer.
41     b64_offset: usize,
42     // The amount of buffered b64 data.
43     b64_len: usize,
44     // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
45     // decoded chunk in to, we have to be able to hang on to a few decoded bytes.
46     // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
47     // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
48     // into here, which seems like a lot of complexity for 1 extra byte of storage.
49     decoded_buffer: [u8; 3],
50     // index of start of decoded data
51     decoded_offset: usize,
52     // length of decoded data
53     decoded_len: usize,
54     // used to provide accurate offsets in errors
55     total_b64_decoded: usize,
56 }
57 
58 impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result59     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
60         f.debug_struct("DecoderReader")
61             .field("b64_offset", &self.b64_offset)
62             .field("b64_len", &self.b64_len)
63             .field("decoded_buffer", &self.decoded_buffer)
64             .field("decoded_offset", &self.decoded_offset)
65             .field("decoded_len", &self.decoded_len)
66             .field("total_b64_decoded", &self.total_b64_decoded)
67             .finish()
68     }
69 }
70 
71 impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
72     /// Create a new decoder that will read from the provided reader `r`.
new(reader: R, engine: &'e E) -> Self73     pub fn new(reader: R, engine: &'e E) -> Self {
74         DecoderReader {
75             engine,
76             inner: reader,
77             b64_buffer: [0; BUF_SIZE],
78             b64_offset: 0,
79             b64_len: 0,
80             decoded_buffer: [0; DECODED_CHUNK_SIZE],
81             decoded_offset: 0,
82             decoded_len: 0,
83             total_b64_decoded: 0,
84         }
85     }
86 
87     /// Write as much as possible of the decoded buffer into the target buffer.
88     /// Must only be called when there is something to write and space to write into.
89     /// Returns a Result with the number of (decoded) bytes copied.
flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize>90     fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
91         debug_assert!(self.decoded_len > 0);
92         debug_assert!(!buf.is_empty());
93 
94         let copy_len = cmp::min(self.decoded_len, buf.len());
95         debug_assert!(copy_len > 0);
96         debug_assert!(copy_len <= self.decoded_len);
97 
98         buf[..copy_len].copy_from_slice(
99             &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
100         );
101 
102         self.decoded_offset += copy_len;
103         self.decoded_len -= copy_len;
104 
105         debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
106 
107         Ok(copy_len)
108     }
109 
110     /// Read into the remaining space in the buffer after the current contents.
111     /// Must only be called when there is space to read into in the buffer.
112     /// Returns the number of bytes read.
read_from_delegate(&mut self) -> io::Result<usize>113     fn read_from_delegate(&mut self) -> io::Result<usize> {
114         debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
115 
116         let read = self
117             .inner
118             .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
119         self.b64_len += read;
120 
121         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
122 
123         Ok(read)
124     }
125 
126     /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
127     /// caller's responsibility to choose the number of b64 bytes to decode correctly.
128     ///
129     /// Returns a Result with the number of decoded bytes written to `buf`.
decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize>130     fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
131         debug_assert!(self.b64_len >= num_bytes);
132         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
133         debug_assert!(!buf.is_empty());
134 
135         let decoded = self
136             .engine
137             .internal_decode(
138                 &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
139                 buf,
140                 self.engine.internal_decoded_len_estimate(num_bytes),
141             )
142             .map_err(|e| match e {
143                 DecodeError::InvalidByte(offset, byte) => {
144                     DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
145                 }
146                 DecodeError::InvalidLength => DecodeError::InvalidLength,
147                 DecodeError::InvalidLastSymbol(offset, byte) => {
148                     DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
149                 }
150                 DecodeError::InvalidPadding => DecodeError::InvalidPadding,
151             })
152             .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
153 
154         self.total_b64_decoded += num_bytes;
155         self.b64_offset += num_bytes;
156         self.b64_len -= num_bytes;
157 
158         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
159 
160         Ok(decoded)
161     }
162 
163     /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
164     /// input from.
165     ///
166     /// Because `DecoderReader` performs internal buffering, the state of the inner reader is
167     /// unspecified. This function is mainly provided because the inner reader type may provide
168     /// additional functionality beyond the `Read` implementation which may still be useful.
into_inner(self) -> R169     pub fn into_inner(self) -> R {
170         self.inner
171     }
172 }
173 
174 impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
175     /// Decode input from the wrapped reader.
176     ///
177     /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
178     /// written in `buf`.
179     ///
180     /// Where possible, this function buffers base64 to minimize the number of read() calls to the
181     /// delegate reader.
182     ///
183     /// # Errors
184     ///
185     /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
186     /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
read(&mut self, buf: &mut [u8]) -> io::Result<usize>187     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
188         if buf.is_empty() {
189             return Ok(0);
190         }
191 
192         // offset == BUF_SIZE when we copied it all last time
193         debug_assert!(self.b64_offset <= BUF_SIZE);
194         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
195         debug_assert!(if self.b64_offset == BUF_SIZE {
196             self.b64_len == 0
197         } else {
198             self.b64_len <= BUF_SIZE
199         });
200 
201         debug_assert!(if self.decoded_len == 0 {
202             // can be = when we were able to copy the complete chunk
203             self.decoded_offset <= DECODED_CHUNK_SIZE
204         } else {
205             self.decoded_offset < DECODED_CHUNK_SIZE
206         });
207 
208         // We shouldn't ever decode into here when we can't immediately write at least one byte into
209         // the provided buf, so the effective length should only be 3 momentarily between when we
210         // decode and when we copy into the target buffer.
211         debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
212         debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
213 
214         if self.decoded_len > 0 {
215             // we have a few leftover decoded bytes; flush that rather than pull in more b64
216             self.flush_decoded_buf(buf)
217         } else {
218             let mut at_eof = false;
219             while self.b64_len < BASE64_CHUNK_SIZE {
220                 // Work around lack of copy_within, which is only present in 1.37
221                 // Copy any bytes we have to the start of the buffer.
222                 // We know we have < 1 chunk, so we can use a tiny tmp buffer.
223                 let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
224                 memmove_buf[..self.b64_len].copy_from_slice(
225                     &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
226                 );
227                 self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
228                 self.b64_offset = 0;
229 
230                 // then fill in more data
231                 let read = self.read_from_delegate()?;
232                 if read == 0 {
233                     // we never pass in an empty buf, so 0 => we've hit EOF
234                     at_eof = true;
235                     break;
236                 }
237             }
238 
239             if self.b64_len == 0 {
240                 debug_assert!(at_eof);
241                 // we must be at EOF, and we have no data left to decode
242                 return Ok(0);
243             };
244 
245             debug_assert!(if at_eof {
246                 // if we are at eof, we may not have a complete chunk
247                 self.b64_len > 0
248             } else {
249                 // otherwise, we must have at least one chunk
250                 self.b64_len >= BASE64_CHUNK_SIZE
251             });
252 
253             debug_assert_eq!(0, self.decoded_len);
254 
255             if buf.len() < DECODED_CHUNK_SIZE {
256                 // caller requested an annoyingly short read
257                 // have to write to a tmp buf first to avoid double mutable borrow
258                 let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
259                 // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
260                 // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
261                 // tokens, not 1, since 1 token can't decode to 1 byte).
262                 let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
263 
264                 let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
265                 self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
266 
267                 self.decoded_offset = 0;
268                 self.decoded_len = decoded;
269 
270                 // can be less than 3 on last block due to padding
271                 debug_assert!(decoded <= 3);
272 
273                 self.flush_decoded_buf(buf)
274             } else {
275                 let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
276                     .checked_mul(BASE64_CHUNK_SIZE)
277                     .expect("too many chunks");
278                 debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
279 
280                 let b64_bytes_available_to_decode = if at_eof {
281                     self.b64_len
282                 } else {
283                     // only use complete chunks
284                     self.b64_len - self.b64_len % 4
285                 };
286 
287                 let actual_decode_len = cmp::min(
288                     b64_bytes_that_can_decode_into_buf,
289                     b64_bytes_available_to_decode,
290                 );
291                 self.decode_to_buf(actual_decode_len, buf)
292             }
293         }
294     }
295 }
296