1 use crate::{engine::Engine, DecodeError}; 2 use std::{cmp, fmt, io}; 3 4 // This should be large, but it has to fit on the stack. 5 pub(crate) const BUF_SIZE: usize = 1024; 6 7 // 4 bytes of base64 data encode 3 bytes of raw data (modulo padding). 8 const BASE64_CHUNK_SIZE: usize = 4; 9 const DECODED_CHUNK_SIZE: usize = 3; 10 11 /// A `Read` implementation that decodes base64 data read from an underlying reader. 12 /// 13 /// # Examples 14 /// 15 /// ``` 16 /// use std::io::Read; 17 /// use std::io::Cursor; 18 /// use base64::engine::general_purpose; 19 /// 20 /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc. 21 /// let mut wrapped_reader = Cursor::new(b"YXNkZg=="); 22 /// let mut decoder = base64::read::DecoderReader::new( 23 /// &mut wrapped_reader, 24 /// &general_purpose::STANDARD); 25 /// 26 /// // handle errors as you normally would 27 /// let mut result = Vec::new(); 28 /// decoder.read_to_end(&mut result).unwrap(); 29 /// 30 /// assert_eq!(b"asdf", &result[..]); 31 /// 32 /// ``` 33 pub struct DecoderReader<'e, E: Engine, R: io::Read> { 34 engine: &'e E, 35 /// Where b64 data is read from 36 inner: R, 37 38 // Holds b64 data read from the delegate reader. 39 b64_buffer: [u8; BUF_SIZE], 40 // The start of the pending buffered data in b64_buffer. 41 b64_offset: usize, 42 // The amount of buffered b64 data. 43 b64_len: usize, 44 // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a 45 // decoded chunk in to, we have to be able to hang on to a few decoded bytes. 46 // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to 47 // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest 48 // into here, which seems like a lot of complexity for 1 extra byte of storage. 49 decoded_buffer: [u8; 3], 50 // index of start of decoded data 51 decoded_offset: usize, 52 // length of decoded data 53 decoded_len: usize, 54 // used to provide accurate offsets in errors 55 total_b64_decoded: usize, 56 } 57 58 impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result59 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 60 f.debug_struct("DecoderReader") 61 .field("b64_offset", &self.b64_offset) 62 .field("b64_len", &self.b64_len) 63 .field("decoded_buffer", &self.decoded_buffer) 64 .field("decoded_offset", &self.decoded_offset) 65 .field("decoded_len", &self.decoded_len) 66 .field("total_b64_decoded", &self.total_b64_decoded) 67 .finish() 68 } 69 } 70 71 impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { 72 /// Create a new decoder that will read from the provided reader `r`. new(reader: R, engine: &'e E) -> Self73 pub fn new(reader: R, engine: &'e E) -> Self { 74 DecoderReader { 75 engine, 76 inner: reader, 77 b64_buffer: [0; BUF_SIZE], 78 b64_offset: 0, 79 b64_len: 0, 80 decoded_buffer: [0; DECODED_CHUNK_SIZE], 81 decoded_offset: 0, 82 decoded_len: 0, 83 total_b64_decoded: 0, 84 } 85 } 86 87 /// Write as much as possible of the decoded buffer into the target buffer. 88 /// Must only be called when there is something to write and space to write into. 89 /// Returns a Result with the number of (decoded) bytes copied. flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize>90 fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> { 91 debug_assert!(self.decoded_len > 0); 92 debug_assert!(!buf.is_empty()); 93 94 let copy_len = cmp::min(self.decoded_len, buf.len()); 95 debug_assert!(copy_len > 0); 96 debug_assert!(copy_len <= self.decoded_len); 97 98 buf[..copy_len].copy_from_slice( 99 &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len], 100 ); 101 102 self.decoded_offset += copy_len; 103 self.decoded_len -= copy_len; 104 105 debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); 106 107 Ok(copy_len) 108 } 109 110 /// Read into the remaining space in the buffer after the current contents. 111 /// Must only be called when there is space to read into in the buffer. 112 /// Returns the number of bytes read. read_from_delegate(&mut self) -> io::Result<usize>113 fn read_from_delegate(&mut self) -> io::Result<usize> { 114 debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE); 115 116 let read = self 117 .inner 118 .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?; 119 self.b64_len += read; 120 121 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); 122 123 Ok(read) 124 } 125 126 /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the 127 /// caller's responsibility to choose the number of b64 bytes to decode correctly. 128 /// 129 /// Returns a Result with the number of decoded bytes written to `buf`. decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize>130 fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> { 131 debug_assert!(self.b64_len >= num_bytes); 132 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); 133 debug_assert!(!buf.is_empty()); 134 135 let decoded = self 136 .engine 137 .internal_decode( 138 &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes], 139 buf, 140 self.engine.internal_decoded_len_estimate(num_bytes), 141 ) 142 .map_err(|e| match e { 143 DecodeError::InvalidByte(offset, byte) => { 144 DecodeError::InvalidByte(self.total_b64_decoded + offset, byte) 145 } 146 DecodeError::InvalidLength => DecodeError::InvalidLength, 147 DecodeError::InvalidLastSymbol(offset, byte) => { 148 DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte) 149 } 150 DecodeError::InvalidPadding => DecodeError::InvalidPadding, 151 }) 152 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; 153 154 self.total_b64_decoded += num_bytes; 155 self.b64_offset += num_bytes; 156 self.b64_len -= num_bytes; 157 158 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); 159 160 Ok(decoded) 161 } 162 163 /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded 164 /// input from. 165 /// 166 /// Because `DecoderReader` performs internal buffering, the state of the inner reader is 167 /// unspecified. This function is mainly provided because the inner reader type may provide 168 /// additional functionality beyond the `Read` implementation which may still be useful. into_inner(self) -> R169 pub fn into_inner(self) -> R { 170 self.inner 171 } 172 } 173 174 impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { 175 /// Decode input from the wrapped reader. 176 /// 177 /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes 178 /// written in `buf`. 179 /// 180 /// Where possible, this function buffers base64 to minimize the number of read() calls to the 181 /// delegate reader. 182 /// 183 /// # Errors 184 /// 185 /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid 186 /// base64 are also possible, and will have `io::ErrorKind::InvalidData`. read(&mut self, buf: &mut [u8]) -> io::Result<usize>187 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { 188 if buf.is_empty() { 189 return Ok(0); 190 } 191 192 // offset == BUF_SIZE when we copied it all last time 193 debug_assert!(self.b64_offset <= BUF_SIZE); 194 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); 195 debug_assert!(if self.b64_offset == BUF_SIZE { 196 self.b64_len == 0 197 } else { 198 self.b64_len <= BUF_SIZE 199 }); 200 201 debug_assert!(if self.decoded_len == 0 { 202 // can be = when we were able to copy the complete chunk 203 self.decoded_offset <= DECODED_CHUNK_SIZE 204 } else { 205 self.decoded_offset < DECODED_CHUNK_SIZE 206 }); 207 208 // We shouldn't ever decode into here when we can't immediately write at least one byte into 209 // the provided buf, so the effective length should only be 3 momentarily between when we 210 // decode and when we copy into the target buffer. 211 debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); 212 debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE); 213 214 if self.decoded_len > 0 { 215 // we have a few leftover decoded bytes; flush that rather than pull in more b64 216 self.flush_decoded_buf(buf) 217 } else { 218 let mut at_eof = false; 219 while self.b64_len < BASE64_CHUNK_SIZE { 220 // Work around lack of copy_within, which is only present in 1.37 221 // Copy any bytes we have to the start of the buffer. 222 // We know we have < 1 chunk, so we can use a tiny tmp buffer. 223 let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE]; 224 memmove_buf[..self.b64_len].copy_from_slice( 225 &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len], 226 ); 227 self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]); 228 self.b64_offset = 0; 229 230 // then fill in more data 231 let read = self.read_from_delegate()?; 232 if read == 0 { 233 // we never pass in an empty buf, so 0 => we've hit EOF 234 at_eof = true; 235 break; 236 } 237 } 238 239 if self.b64_len == 0 { 240 debug_assert!(at_eof); 241 // we must be at EOF, and we have no data left to decode 242 return Ok(0); 243 }; 244 245 debug_assert!(if at_eof { 246 // if we are at eof, we may not have a complete chunk 247 self.b64_len > 0 248 } else { 249 // otherwise, we must have at least one chunk 250 self.b64_len >= BASE64_CHUNK_SIZE 251 }); 252 253 debug_assert_eq!(0, self.decoded_len); 254 255 if buf.len() < DECODED_CHUNK_SIZE { 256 // caller requested an annoyingly short read 257 // have to write to a tmp buf first to avoid double mutable borrow 258 let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE]; 259 // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have 260 // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64 261 // tokens, not 1, since 1 token can't decode to 1 byte). 262 let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE); 263 264 let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?; 265 self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]); 266 267 self.decoded_offset = 0; 268 self.decoded_len = decoded; 269 270 // can be less than 3 on last block due to padding 271 debug_assert!(decoded <= 3); 272 273 self.flush_decoded_buf(buf) 274 } else { 275 let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE) 276 .checked_mul(BASE64_CHUNK_SIZE) 277 .expect("too many chunks"); 278 debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE); 279 280 let b64_bytes_available_to_decode = if at_eof { 281 self.b64_len 282 } else { 283 // only use complete chunks 284 self.b64_len - self.b64_len % 4 285 }; 286 287 let actual_decode_len = cmp::min( 288 b64_bytes_that_can_decode_into_buf, 289 b64_bytes_available_to_decode, 290 ); 291 self.decode_to_buf(actual_decode_len, buf) 292 } 293 } 294 } 295 } 296