1 //! Provides the [Engine] abstraction and out of the box implementations. 2 #[cfg(any(feature = "alloc", test))] 3 use crate::chunked_encoder; 4 use crate::{ 5 encode::{encode_with_padding, EncodeSliceError}, 6 encoded_len, DecodeError, DecodeSliceError, 7 }; 8 #[cfg(any(feature = "alloc", test))] 9 use alloc::vec::Vec; 10 11 #[cfg(any(feature = "alloc", test))] 12 use alloc::{string::String, vec}; 13 14 pub mod general_purpose; 15 16 #[cfg(test)] 17 mod naive; 18 19 #[cfg(test)] 20 mod tests; 21 22 pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig}; 23 24 /// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this. 25 /// 26 /// Different implementations offer different characteristics. The library currently ships with 27 /// [GeneralPurpose] that offers good speed and works on any CPU, with more choices 28 /// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed. 29 /// 30 /// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's 31 /// recommended to store the engine in a `const` so that references to it won't pose any lifetime 32 /// issues, and to avoid repeating the cost of engine setup. 33 /// 34 /// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden. 35 // When adding an implementation of Engine, include them in the engine test suite: 36 // - add an implementation of [engine::tests::EngineWrapper] 37 // - add the implementation to the `all_engines` macro 38 // All tests run on all engines listed in the macro. 39 pub trait Engine: Send + Sync { 40 /// The config type used by this engine 41 type Config: Config; 42 /// The decode estimate used by this engine 43 type DecodeEstimate: DecodeEstimate; 44 45 /// This is not meant to be called directly; it is only for `Engine` implementors. 46 /// See the other `encode*` functions on this trait. 47 /// 48 /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`. 49 /// 50 /// `output` will be long enough to hold the encoded data. 51 /// 52 /// Returns the number of bytes written. 53 /// 54 /// No padding should be written; that is handled separately. 55 /// 56 /// Must not write any bytes into the output slice other than the encoded data. 57 #[doc(hidden)] internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize58 fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize; 59 60 /// This is not meant to be called directly; it is only for `Engine` implementors. 61 /// 62 /// As an optimization to prevent the decoded length from being calculated twice, it is 63 /// sometimes helpful to have a conservative estimate of the decoded size before doing the 64 /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed. 65 #[doc(hidden)] internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate66 fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate; 67 68 /// This is not meant to be called directly; it is only for `Engine` implementors. 69 /// See the other `decode*` functions on this trait. 70 /// 71 /// Decode `input` base64 bytes into the `output` buffer. 72 /// 73 /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid 74 /// calculating it again (expensive on short inputs).` 75 /// 76 /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this 77 /// function must also handle the final possibly partial chunk. 78 /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4, 79 /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the 80 /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5). 81 /// 82 /// Decoding must not write any bytes into the output slice other than the decoded data. 83 /// 84 /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as 85 /// errors unless the engine is configured otherwise. 86 #[doc(hidden)] internal_decode( &self, input: &[u8], output: &mut [u8], decode_estimate: Self::DecodeEstimate, ) -> Result<DecodeMetadata, DecodeSliceError>87 fn internal_decode( 88 &self, 89 input: &[u8], 90 output: &mut [u8], 91 decode_estimate: Self::DecodeEstimate, 92 ) -> Result<DecodeMetadata, DecodeSliceError>; 93 94 /// Returns the config for this engine. config(&self) -> &Self::Config95 fn config(&self) -> &Self::Config; 96 97 /// Encode arbitrary octets as base64 using the provided `Engine`. 98 /// Returns a `String`. 99 /// 100 /// # Example 101 /// 102 /// ```rust 103 /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; 104 /// 105 /// let b64 = general_purpose::STANDARD.encode(b"hello world~"); 106 /// println!("{}", b64); 107 /// 108 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 109 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); 110 /// 111 /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~"); 112 #[cfg(any(feature = "alloc", test))] 113 #[inline] encode<T: AsRef<[u8]>>(&self, input: T) -> String114 fn encode<T: AsRef<[u8]>>(&self, input: T) -> String { 115 fn inner<E>(engine: &E, input_bytes: &[u8]) -> String 116 where 117 E: Engine + ?Sized, 118 { 119 let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) 120 .expect("integer overflow when calculating buffer size"); 121 122 let mut buf = vec![0; encoded_size]; 123 124 encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size); 125 126 String::from_utf8(buf).expect("Invalid UTF8") 127 } 128 129 inner(self, input.as_ref()) 130 } 131 132 /// Encode arbitrary octets as base64 into a supplied `String`. 133 /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough. 134 /// 135 /// # Example 136 /// 137 /// ```rust 138 /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; 139 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 140 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); 141 /// 142 /// fn main() { 143 /// let mut buf = String::new(); 144 /// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf); 145 /// println!("{}", buf); 146 /// 147 /// buf.clear(); 148 /// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf); 149 /// println!("{}", buf); 150 /// } 151 /// ``` 152 #[cfg(any(feature = "alloc", test))] 153 #[inline] encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String)154 fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) { 155 fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String) 156 where 157 E: Engine + ?Sized, 158 { 159 let mut sink = chunked_encoder::StringSink::new(output_buf); 160 161 chunked_encoder::ChunkedEncoder::new(engine) 162 .encode(input_bytes, &mut sink) 163 .expect("Writing to a String shouldn't fail"); 164 } 165 166 inner(self, input.as_ref(), output_buf) 167 } 168 169 /// Encode arbitrary octets as base64 into a supplied slice. 170 /// Writes into the supplied output buffer. 171 /// 172 /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident 173 /// or statically-allocated buffer). 174 /// 175 /// # Example 176 /// 177 #[cfg_attr(feature = "alloc", doc = "```")] 178 #[cfg_attr(not(feature = "alloc"), doc = "```ignore")] 179 /// use base64::{Engine as _, engine::general_purpose}; 180 /// let s = b"hello internet!"; 181 /// let mut buf = Vec::new(); 182 /// // make sure we'll have a slice big enough for base64 + padding 183 /// buf.resize(s.len() * 4 / 3 + 4, 0); 184 /// 185 /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap(); 186 /// 187 /// // shorten our vec down to just what was written 188 /// buf.truncate(bytes_written); 189 /// 190 /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice()); 191 /// ``` 192 #[inline] encode_slice<T: AsRef<[u8]>>( &self, input: T, output_buf: &mut [u8], ) -> Result<usize, EncodeSliceError>193 fn encode_slice<T: AsRef<[u8]>>( 194 &self, 195 input: T, 196 output_buf: &mut [u8], 197 ) -> Result<usize, EncodeSliceError> { 198 fn inner<E>( 199 engine: &E, 200 input_bytes: &[u8], 201 output_buf: &mut [u8], 202 ) -> Result<usize, EncodeSliceError> 203 where 204 E: Engine + ?Sized, 205 { 206 let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) 207 .expect("usize overflow when calculating buffer size"); 208 209 if output_buf.len() < encoded_size { 210 return Err(EncodeSliceError::OutputSliceTooSmall); 211 } 212 213 let b64_output = &mut output_buf[0..encoded_size]; 214 215 encode_with_padding(input_bytes, b64_output, engine, encoded_size); 216 217 Ok(encoded_size) 218 } 219 220 inner(self, input.as_ref(), output_buf) 221 } 222 223 /// Decode the input into a new `Vec`. 224 /// 225 /// # Example 226 /// 227 /// ```rust 228 /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; 229 /// 230 /// let bytes = general_purpose::STANDARD 231 /// .decode("aGVsbG8gd29ybGR+Cg==").unwrap(); 232 /// println!("{:?}", bytes); 233 /// 234 /// // custom engine setup 235 /// let bytes_url = engine::GeneralPurpose::new( 236 /// &alphabet::URL_SAFE, 237 /// general_purpose::NO_PAD) 238 /// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap(); 239 /// println!("{:?}", bytes_url); 240 /// ``` 241 #[cfg(any(feature = "alloc", test))] 242 #[inline] decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError>243 fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> { 244 fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError> 245 where 246 E: Engine + ?Sized, 247 { 248 let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); 249 let mut buffer = vec![0; estimate.decoded_len_estimate()]; 250 251 let bytes_written = engine 252 .internal_decode(input_bytes, &mut buffer, estimate) 253 .map_err(|e| match e { 254 DecodeSliceError::DecodeError(e) => e, 255 DecodeSliceError::OutputSliceTooSmall => { 256 unreachable!("Vec is sized conservatively") 257 } 258 })? 259 .decoded_len; 260 261 buffer.truncate(bytes_written); 262 263 Ok(buffer) 264 } 265 266 inner(self, input.as_ref()) 267 } 268 269 /// Decode the `input` into the supplied `buffer`. 270 /// 271 /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough. 272 /// Returns a `Result` containing an empty tuple, aka `()`. 273 /// 274 /// # Example 275 /// 276 /// ```rust 277 /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; 278 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 279 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD); 280 /// 281 /// fn main() { 282 /// use base64::Engine; 283 /// let mut buffer = Vec::<u8>::new(); 284 /// // with the default engine 285 /// general_purpose::STANDARD 286 /// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap(); 287 /// println!("{:?}", buffer); 288 /// 289 /// buffer.clear(); 290 /// 291 /// // with a custom engine 292 /// CUSTOM_ENGINE.decode_vec( 293 /// "aGVsbG8gaW50ZXJuZXR-Cg==", 294 /// &mut buffer, 295 /// ).unwrap(); 296 /// println!("{:?}", buffer); 297 /// } 298 /// ``` 299 #[cfg(any(feature = "alloc", test))] 300 #[inline] decode_vec<T: AsRef<[u8]>>( &self, input: T, buffer: &mut Vec<u8>, ) -> Result<(), DecodeError>301 fn decode_vec<T: AsRef<[u8]>>( 302 &self, 303 input: T, 304 buffer: &mut Vec<u8>, 305 ) -> Result<(), DecodeError> { 306 fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError> 307 where 308 E: Engine + ?Sized, 309 { 310 let starting_output_len = buffer.len(); 311 let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); 312 313 let total_len_estimate = estimate 314 .decoded_len_estimate() 315 .checked_add(starting_output_len) 316 .expect("Overflow when calculating output buffer length"); 317 318 buffer.resize(total_len_estimate, 0); 319 320 let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; 321 322 let bytes_written = engine 323 .internal_decode(input_bytes, buffer_slice, estimate) 324 .map_err(|e| match e { 325 DecodeSliceError::DecodeError(e) => e, 326 DecodeSliceError::OutputSliceTooSmall => { 327 unreachable!("Vec is sized conservatively") 328 } 329 })? 330 .decoded_len; 331 332 buffer.truncate(starting_output_len + bytes_written); 333 334 Ok(()) 335 } 336 337 inner(self, input.as_ref(), buffer) 338 } 339 340 /// Decode the input into the provided output slice. 341 /// 342 /// Returns the number of bytes written to the slice, or an error if `output` is smaller than 343 /// the estimated decoded length. 344 /// 345 /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). 346 /// 347 /// See [crate::decoded_len_estimate] for calculating buffer sizes. 348 /// 349 /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error 350 /// if the output buffer is too small. 351 #[inline] decode_slice<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeSliceError>352 fn decode_slice<T: AsRef<[u8]>>( 353 &self, 354 input: T, 355 output: &mut [u8], 356 ) -> Result<usize, DecodeSliceError> { 357 fn inner<E>( 358 engine: &E, 359 input_bytes: &[u8], 360 output: &mut [u8], 361 ) -> Result<usize, DecodeSliceError> 362 where 363 E: Engine + ?Sized, 364 { 365 engine 366 .internal_decode( 367 input_bytes, 368 output, 369 engine.internal_decoded_len_estimate(input_bytes.len()), 370 ) 371 .map(|dm| dm.decoded_len) 372 } 373 374 inner(self, input.as_ref(), output) 375 } 376 377 /// Decode the input into the provided output slice. 378 /// 379 /// Returns the number of bytes written to the slice. 380 /// 381 /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). 382 /// 383 /// See [crate::decoded_len_estimate] for calculating buffer sizes. 384 /// 385 /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output 386 /// buffer is too small. 387 /// 388 /// # Panics 389 /// 390 /// Panics if the provided output buffer is too small for the decoded data. 391 #[inline] decode_slice_unchecked<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeError>392 fn decode_slice_unchecked<T: AsRef<[u8]>>( 393 &self, 394 input: T, 395 output: &mut [u8], 396 ) -> Result<usize, DecodeError> { 397 fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> 398 where 399 E: Engine + ?Sized, 400 { 401 engine 402 .internal_decode( 403 input_bytes, 404 output, 405 engine.internal_decoded_len_estimate(input_bytes.len()), 406 ) 407 .map(|dm| dm.decoded_len) 408 .map_err(|e| match e { 409 DecodeSliceError::DecodeError(e) => e, 410 DecodeSliceError::OutputSliceTooSmall => { 411 panic!("Output slice is too small") 412 } 413 }) 414 } 415 416 inner(self, input.as_ref(), output) 417 } 418 } 419 420 /// The minimal level of configuration that engines must support. 421 pub trait Config { 422 /// Returns `true` if padding should be added after the encoded output. 423 /// 424 /// Padding is added outside the engine's encode() since the engine may be used 425 /// to encode only a chunk of the overall output, so it can't always know when 426 /// the output is "done" and would therefore need padding (if configured). 427 // It could be provided as a separate parameter when encoding, but that feels like 428 // leaking an implementation detail to the user, and it's hopefully more convenient 429 // to have to only pass one thing (the engine) to any part of the API. encode_padding(&self) -> bool430 fn encode_padding(&self) -> bool; 431 } 432 433 /// The decode estimate used by an engine implementation. Users do not need to interact with this; 434 /// it is only for engine implementors. 435 /// 436 /// Implementors may store relevant data here when constructing this to avoid having to calculate 437 /// them again during actual decoding. 438 pub trait DecodeEstimate { 439 /// Returns a conservative (err on the side of too big) estimate of the decoded length to use 440 /// for pre-allocating buffers, etc. 441 /// 442 /// The estimate must be no larger than the next largest complete triple of decoded bytes. 443 /// That is, the final quad of tokens to decode may be assumed to be complete with no padding. decoded_len_estimate(&self) -> usize444 fn decoded_len_estimate(&self) -> usize; 445 } 446 447 /// Controls how pad bytes are handled when decoding. 448 /// 449 /// Each [Engine] must support at least the behavior indicated by 450 /// [DecodePaddingMode::RequireCanonical], and may support other modes. 451 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 452 pub enum DecodePaddingMode { 453 /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed. 454 Indifferent, 455 /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix). 456 RequireCanonical, 457 /// Padding must be absent -- for when you want predictable padding, without any wasted bytes. 458 RequireNone, 459 } 460 461 /// Metadata about the result of a decode operation 462 #[derive(PartialEq, Eq, Debug)] 463 pub struct DecodeMetadata { 464 /// Number of decoded bytes output 465 pub(crate) decoded_len: usize, 466 /// Offset of the first padding byte in the input, if any 467 pub(crate) padding_offset: Option<usize>, 468 } 469 470 impl DecodeMetadata { new(decoded_bytes: usize, padding_index: Option<usize>) -> Self471 pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self { 472 Self { 473 decoded_len: decoded_bytes, 474 padding_offset: padding_index, 475 } 476 } 477 } 478