1 //! Provides the [Engine] abstraction and out of the box implementations. 2 #[cfg(any(feature = "alloc", feature = "std", test))] 3 use crate::chunked_encoder; 4 use crate::{ 5 encode::{encode_with_padding, EncodeSliceError}, 6 encoded_len, DecodeError, DecodeSliceError, 7 }; 8 #[cfg(any(feature = "alloc", feature = "std", test))] 9 use alloc::vec::Vec; 10 11 #[cfg(any(feature = "alloc", feature = "std", test))] 12 use alloc::{string::String, vec}; 13 14 pub mod general_purpose; 15 16 #[cfg(test)] 17 mod naive; 18 19 #[cfg(test)] 20 mod tests; 21 22 pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig}; 23 24 /// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this. 25 /// 26 /// Different implementations offer different characteristics. The library currently ships with 27 /// [GeneralPurpose] that offers good speed and works on any CPU, with more choices 28 /// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed. 29 /// 30 /// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's 31 /// recommended to store the engine in a `const` so that references to it won't pose any lifetime 32 /// issues, and to avoid repeating the cost of engine setup. 33 /// 34 /// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden. 35 // When adding an implementation of Engine, include them in the engine test suite: 36 // - add an implementation of [engine::tests::EngineWrapper] 37 // - add the implementation to the `all_engines` macro 38 // All tests run on all engines listed in the macro. 39 pub trait Engine: Send + Sync { 40 /// The config type used by this engine 41 type Config: Config; 42 /// The decode estimate used by this engine 43 type DecodeEstimate: DecodeEstimate; 44 45 /// This is not meant to be called directly; it is only for `Engine` implementors. 46 /// See the other `encode*` functions on this trait. 47 /// 48 /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`. 49 /// 50 /// `output` will be long enough to hold the encoded data. 51 /// 52 /// Returns the number of bytes written. 53 /// 54 /// No padding should be written; that is handled separately. 55 /// 56 /// Must not write any bytes into the output slice other than the encoded data. 57 #[doc(hidden)] internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize58 fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize; 59 60 /// This is not meant to be called directly; it is only for `Engine` implementors. 61 /// 62 /// As an optimization to prevent the decoded length from being calculated twice, it is 63 /// sometimes helpful to have a conservative estimate of the decoded size before doing the 64 /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed. 65 /// 66 /// # Panics 67 /// 68 /// Panics if decoded length estimation overflows. 69 #[doc(hidden)] internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate70 fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate; 71 72 /// This is not meant to be called directly; it is only for `Engine` implementors. 73 /// See the other `decode*` functions on this trait. 74 /// 75 /// Decode `input` base64 bytes into the `output` buffer. 76 /// 77 /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid 78 /// calculating it again (expensive on short inputs).` 79 /// 80 /// Returns the number of bytes written to `output`. 81 /// 82 /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this 83 /// function must also handle the final possibly partial chunk. 84 /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4, 85 /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the 86 /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5). 87 /// 88 /// Decoding must not write any bytes into the output slice other than the decoded data. 89 /// 90 /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as 91 /// errors unless the engine is configured otherwise. 92 /// 93 /// # Panics 94 /// 95 /// Panics if `output` is too small. 96 #[doc(hidden)] internal_decode( &self, input: &[u8], output: &mut [u8], decode_estimate: Self::DecodeEstimate, ) -> Result<usize, DecodeError>97 fn internal_decode( 98 &self, 99 input: &[u8], 100 output: &mut [u8], 101 decode_estimate: Self::DecodeEstimate, 102 ) -> Result<usize, DecodeError>; 103 104 /// Returns the config for this engine. config(&self) -> &Self::Config105 fn config(&self) -> &Self::Config; 106 107 /// Encode arbitrary octets as base64 using the provided `Engine`. 108 /// Returns a `String`. 109 /// 110 /// # Example 111 /// 112 /// ```rust 113 /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; 114 /// 115 /// let b64 = general_purpose::STANDARD.encode(b"hello world~"); 116 /// println!("{}", b64); 117 /// 118 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 119 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); 120 /// 121 /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~"); 122 #[cfg(any(feature = "alloc", feature = "std", test))] encode<T: AsRef<[u8]>>(&self, input: T) -> String123 fn encode<T: AsRef<[u8]>>(&self, input: T) -> String { 124 let encoded_size = encoded_len(input.as_ref().len(), self.config().encode_padding()) 125 .expect("integer overflow when calculating buffer size"); 126 let mut buf = vec![0; encoded_size]; 127 128 encode_with_padding(input.as_ref(), &mut buf[..], self, encoded_size); 129 130 String::from_utf8(buf).expect("Invalid UTF8") 131 } 132 133 /// Encode arbitrary octets as base64 into a supplied `String`. 134 /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough. 135 /// 136 /// # Example 137 /// 138 /// ```rust 139 /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; 140 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 141 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); 142 /// 143 /// fn main() { 144 /// let mut buf = String::new(); 145 /// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf); 146 /// println!("{}", buf); 147 /// 148 /// buf.clear(); 149 /// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf); 150 /// println!("{}", buf); 151 /// } 152 /// ``` 153 #[cfg(any(feature = "alloc", feature = "std", test))] encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String)154 fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) { 155 let input_bytes = input.as_ref(); 156 157 { 158 let mut sink = chunked_encoder::StringSink::new(output_buf); 159 160 chunked_encoder::ChunkedEncoder::new(self) 161 .encode(input_bytes, &mut sink) 162 .expect("Writing to a String shouldn't fail"); 163 } 164 } 165 166 /// Encode arbitrary octets as base64 into a supplied slice. 167 /// Writes into the supplied output buffer. 168 /// 169 /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident 170 /// or statically-allocated buffer). 171 /// 172 /// # Example 173 /// 174 /// ```rust 175 /// use base64::{Engine as _, engine::general_purpose}; 176 /// let s = b"hello internet!"; 177 /// let mut buf = Vec::new(); 178 /// // make sure we'll have a slice big enough for base64 + padding 179 /// buf.resize(s.len() * 4 / 3 + 4, 0); 180 /// 181 /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap(); 182 /// 183 /// // shorten our vec down to just what was written 184 /// buf.truncate(bytes_written); 185 /// 186 /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice()); 187 /// ``` encode_slice<T: AsRef<[u8]>>( &self, input: T, output_buf: &mut [u8], ) -> Result<usize, EncodeSliceError>188 fn encode_slice<T: AsRef<[u8]>>( 189 &self, 190 input: T, 191 output_buf: &mut [u8], 192 ) -> Result<usize, EncodeSliceError> { 193 let input_bytes = input.as_ref(); 194 195 let encoded_size = encoded_len(input_bytes.len(), self.config().encode_padding()) 196 .expect("usize overflow when calculating buffer size"); 197 198 if output_buf.len() < encoded_size { 199 return Err(EncodeSliceError::OutputSliceTooSmall); 200 } 201 202 let b64_output = &mut output_buf[0..encoded_size]; 203 204 encode_with_padding(input_bytes, b64_output, self, encoded_size); 205 206 Ok(encoded_size) 207 } 208 209 /// Decode from string reference as octets using the specified [Engine]. 210 /// Returns a `Result` containing a `Vec<u8>`. 211 /// 212 /// # Example 213 /// 214 /// ```rust 215 /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; 216 /// 217 /// let bytes = general_purpose::STANDARD 218 /// .decode("aGVsbG8gd29ybGR+Cg==").unwrap(); 219 /// println!("{:?}", bytes); 220 /// 221 /// // custom engine setup 222 /// let bytes_url = engine::GeneralPurpose::new( 223 /// &alphabet::URL_SAFE, 224 /// general_purpose::NO_PAD) 225 /// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap(); 226 /// println!("{:?}", bytes_url); 227 /// ``` 228 /// 229 /// # Panics 230 /// 231 /// Panics if decoded length estimation overflows. 232 /// This would happen for sizes within a few bytes of the maximum value of `usize`. 233 #[cfg(any(feature = "alloc", feature = "std", test))] decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError>234 fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> { 235 let input_bytes = input.as_ref(); 236 237 let estimate = self.internal_decoded_len_estimate(input_bytes.len()); 238 let mut buffer = vec![0; estimate.decoded_len_estimate()]; 239 240 let bytes_written = self.internal_decode(input_bytes, &mut buffer, estimate)?; 241 buffer.truncate(bytes_written); 242 243 Ok(buffer) 244 } 245 246 /// Decode from string reference as octets. 247 /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough. 248 /// Returns a `Result` containing an empty tuple, aka `()`. 249 /// 250 /// # Example 251 /// 252 /// ```rust 253 /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; 254 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 255 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD); 256 /// 257 /// fn main() { 258 /// use base64::Engine; 259 /// let mut buffer = Vec::<u8>::new(); 260 /// // with the default engine 261 /// general_purpose::STANDARD 262 /// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap(); 263 /// println!("{:?}", buffer); 264 /// 265 /// buffer.clear(); 266 /// 267 /// // with a custom engine 268 /// CUSTOM_ENGINE.decode_vec( 269 /// "aGVsbG8gaW50ZXJuZXR-Cg==", 270 /// &mut buffer, 271 /// ).unwrap(); 272 /// println!("{:?}", buffer); 273 /// } 274 /// ``` 275 /// 276 /// # Panics 277 /// 278 /// Panics if decoded length estimation overflows. 279 /// This would happen for sizes within a few bytes of the maximum value of `usize`. 280 #[cfg(any(feature = "alloc", feature = "std", test))] decode_vec<T: AsRef<[u8]>>( &self, input: T, buffer: &mut Vec<u8>, ) -> Result<(), DecodeError>281 fn decode_vec<T: AsRef<[u8]>>( 282 &self, 283 input: T, 284 buffer: &mut Vec<u8>, 285 ) -> Result<(), DecodeError> { 286 let input_bytes = input.as_ref(); 287 288 let starting_output_len = buffer.len(); 289 290 let estimate = self.internal_decoded_len_estimate(input_bytes.len()); 291 let total_len_estimate = estimate 292 .decoded_len_estimate() 293 .checked_add(starting_output_len) 294 .expect("Overflow when calculating output buffer length"); 295 buffer.resize(total_len_estimate, 0); 296 297 let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; 298 let bytes_written = self.internal_decode(input_bytes, buffer_slice, estimate)?; 299 300 buffer.truncate(starting_output_len + bytes_written); 301 302 Ok(()) 303 } 304 305 /// Decode the input into the provided output slice. 306 /// 307 /// Returns an error if `output` is smaller than the estimated decoded length. 308 /// 309 /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). 310 /// 311 /// See [crate::decoded_len_estimate] for calculating buffer sizes. 312 /// 313 /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error 314 /// if the output buffer is too small. 315 /// 316 /// # Panics 317 /// 318 /// Panics if decoded length estimation overflows. 319 /// This would happen for sizes within a few bytes of the maximum value of `usize`. decode_slice<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeSliceError>320 fn decode_slice<T: AsRef<[u8]>>( 321 &self, 322 input: T, 323 output: &mut [u8], 324 ) -> Result<usize, DecodeSliceError> { 325 let input_bytes = input.as_ref(); 326 327 let estimate = self.internal_decoded_len_estimate(input_bytes.len()); 328 if output.len() < estimate.decoded_len_estimate() { 329 return Err(DecodeSliceError::OutputSliceTooSmall); 330 } 331 332 self.internal_decode(input_bytes, output, estimate) 333 .map_err(|e| e.into()) 334 } 335 336 /// Decode the input into the provided output slice. 337 /// 338 /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). 339 /// 340 /// See [crate::decoded_len_estimate] for calculating buffer sizes. 341 /// 342 /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output 343 /// buffer is too small. 344 /// 345 /// # Panics 346 /// 347 /// Panics if decoded length estimation overflows. 348 /// This would happen for sizes within a few bytes of the maximum value of `usize`. 349 /// 350 /// Panics if the provided output buffer is too small for the decoded data. decode_slice_unchecked<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeError>351 fn decode_slice_unchecked<T: AsRef<[u8]>>( 352 &self, 353 input: T, 354 output: &mut [u8], 355 ) -> Result<usize, DecodeError> { 356 let input_bytes = input.as_ref(); 357 358 self.internal_decode( 359 input_bytes, 360 output, 361 self.internal_decoded_len_estimate(input_bytes.len()), 362 ) 363 } 364 } 365 366 /// The minimal level of configuration that engines must support. 367 pub trait Config { 368 /// Returns `true` if padding should be added after the encoded output. 369 /// 370 /// Padding is added outside the engine's encode() since the engine may be used 371 /// to encode only a chunk of the overall output, so it can't always know when 372 /// the output is "done" and would therefore need padding (if configured). 373 // It could be provided as a separate parameter when encoding, but that feels like 374 // leaking an implementation detail to the user, and it's hopefully more convenient 375 // to have to only pass one thing (the engine) to any part of the API. encode_padding(&self) -> bool376 fn encode_padding(&self) -> bool; 377 } 378 379 /// The decode estimate used by an engine implementation. Users do not need to interact with this; 380 /// it is only for engine implementors. 381 /// 382 /// Implementors may store relevant data here when constructing this to avoid having to calculate 383 /// them again during actual decoding. 384 pub trait DecodeEstimate { 385 /// Returns a conservative (err on the side of too big) estimate of the decoded length to use 386 /// for pre-allocating buffers, etc. 387 /// 388 /// The estimate must be no larger than the next largest complete triple of decoded bytes. 389 /// That is, the final quad of tokens to decode may be assumed to be complete with no padding. 390 /// 391 /// # Panics 392 /// 393 /// Panics if decoded length estimation overflows. 394 /// This would happen for sizes within a few bytes of the maximum value of `usize`. decoded_len_estimate(&self) -> usize395 fn decoded_len_estimate(&self) -> usize; 396 } 397 398 /// Controls how pad bytes are handled when decoding. 399 /// 400 /// Each [Engine] must support at least the behavior indicated by 401 /// [DecodePaddingMode::RequireCanonical], and may support other modes. 402 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 403 pub enum DecodePaddingMode { 404 /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed. 405 Indifferent, 406 /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix). 407 RequireCanonical, 408 /// Padding must be absent -- for when you want predictable padding, without any wasted bytes. 409 RequireNone, 410 } 411