• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Provides the [Engine] abstraction and out of the box implementations.
2 #[cfg(any(feature = "alloc", test))]
3 use crate::chunked_encoder;
4 use crate::{
5     encode::{encode_with_padding, EncodeSliceError},
6     encoded_len, DecodeError, DecodeSliceError,
7 };
8 #[cfg(any(feature = "alloc", test))]
9 use alloc::vec::Vec;
10 
11 #[cfg(any(feature = "alloc", test))]
12 use alloc::{string::String, vec};
13 
14 pub mod general_purpose;
15 
16 #[cfg(test)]
17 mod naive;
18 
19 #[cfg(test)]
20 mod tests;
21 
22 pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
23 
24 /// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
25 ///
26 /// Different implementations offer different characteristics. The library currently ships with
27 /// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
28 /// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
29 ///
30 /// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
31 /// recommended to store the engine in a `const` so that references to it won't pose any lifetime
32 /// issues, and to avoid repeating the cost of engine setup.
33 ///
34 /// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
35 // When adding an implementation of Engine, include them in the engine test suite:
36 // - add an implementation of [engine::tests::EngineWrapper]
37 // - add the implementation to the `all_engines` macro
38 // All tests run on all engines listed in the macro.
39 pub trait Engine: Send + Sync {
40     /// The config type used by this engine
41     type Config: Config;
42     /// The decode estimate used by this engine
43     type DecodeEstimate: DecodeEstimate;
44 
45     /// This is not meant to be called directly; it is only for `Engine` implementors.
46     /// See the other `encode*` functions on this trait.
47     ///
48     /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
49     ///
50     /// `output` will be long enough to hold the encoded data.
51     ///
52     /// Returns the number of bytes written.
53     ///
54     /// No padding should be written; that is handled separately.
55     ///
56     /// Must not write any bytes into the output slice other than the encoded data.
57     #[doc(hidden)]
internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize58     fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
59 
60     /// This is not meant to be called directly; it is only for `Engine` implementors.
61     ///
62     /// As an optimization to prevent the decoded length from being calculated twice, it is
63     /// sometimes helpful to have a conservative estimate of the decoded size before doing the
64     /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
65     #[doc(hidden)]
internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate66     fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
67 
68     /// This is not meant to be called directly; it is only for `Engine` implementors.
69     /// See the other `decode*` functions on this trait.
70     ///
71     /// Decode `input` base64 bytes into the `output` buffer.
72     ///
73     /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
74     /// calculating it again (expensive on short inputs).`
75     ///
76     /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
77     /// function must also handle the final possibly partial chunk.
78     /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
79     /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the
80     /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5).
81     ///
82     /// Decoding must not write any bytes into the output slice other than the decoded data.
83     ///
84     /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
85     /// errors unless the engine is configured otherwise.
86     #[doc(hidden)]
internal_decode( &self, input: &[u8], output: &mut [u8], decode_estimate: Self::DecodeEstimate, ) -> Result<DecodeMetadata, DecodeSliceError>87     fn internal_decode(
88         &self,
89         input: &[u8],
90         output: &mut [u8],
91         decode_estimate: Self::DecodeEstimate,
92     ) -> Result<DecodeMetadata, DecodeSliceError>;
93 
94     /// Returns the config for this engine.
config(&self) -> &Self::Config95     fn config(&self) -> &Self::Config;
96 
97     /// Encode arbitrary octets as base64 using the provided `Engine`.
98     /// Returns a `String`.
99     ///
100     /// # Example
101     ///
102     /// ```rust
103     /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
104     ///
105     /// let b64 = general_purpose::STANDARD.encode(b"hello world~");
106     /// println!("{}", b64);
107     ///
108     /// const CUSTOM_ENGINE: engine::GeneralPurpose =
109     ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
110     ///
111     /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
112     #[cfg(any(feature = "alloc", test))]
113     #[inline]
encode<T: AsRef<[u8]>>(&self, input: T) -> String114     fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
115         fn inner<E>(engine: &E, input_bytes: &[u8]) -> String
116         where
117             E: Engine + ?Sized,
118         {
119             let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
120                 .expect("integer overflow when calculating buffer size");
121 
122             let mut buf = vec![0; encoded_size];
123 
124             encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size);
125 
126             String::from_utf8(buf).expect("Invalid UTF8")
127         }
128 
129         inner(self, input.as_ref())
130     }
131 
132     /// Encode arbitrary octets as base64 into a supplied `String`.
133     /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
134     ///
135     /// # Example
136     ///
137     /// ```rust
138     /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
139     /// const CUSTOM_ENGINE: engine::GeneralPurpose =
140     ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
141     ///
142     /// fn main() {
143     ///     let mut buf = String::new();
144     ///     general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
145     ///     println!("{}", buf);
146     ///
147     ///     buf.clear();
148     ///     CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
149     ///     println!("{}", buf);
150     /// }
151     /// ```
152     #[cfg(any(feature = "alloc", test))]
153     #[inline]
encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String)154     fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
155         fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String)
156         where
157             E: Engine + ?Sized,
158         {
159             let mut sink = chunked_encoder::StringSink::new(output_buf);
160 
161             chunked_encoder::ChunkedEncoder::new(engine)
162                 .encode(input_bytes, &mut sink)
163                 .expect("Writing to a String shouldn't fail");
164         }
165 
166         inner(self, input.as_ref(), output_buf)
167     }
168 
169     /// Encode arbitrary octets as base64 into a supplied slice.
170     /// Writes into the supplied output buffer.
171     ///
172     /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
173     /// or statically-allocated buffer).
174     ///
175     /// # Example
176     ///
177     #[cfg_attr(feature = "alloc", doc = "```")]
178     #[cfg_attr(not(feature = "alloc"), doc = "```ignore")]
179     /// use base64::{Engine as _, engine::general_purpose};
180     /// let s = b"hello internet!";
181     /// let mut buf = Vec::new();
182     /// // make sure we'll have a slice big enough for base64 + padding
183     /// buf.resize(s.len() * 4 / 3 + 4, 0);
184     ///
185     /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
186     ///
187     /// // shorten our vec down to just what was written
188     /// buf.truncate(bytes_written);
189     ///
190     /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
191     /// ```
192     #[inline]
encode_slice<T: AsRef<[u8]>>( &self, input: T, output_buf: &mut [u8], ) -> Result<usize, EncodeSliceError>193     fn encode_slice<T: AsRef<[u8]>>(
194         &self,
195         input: T,
196         output_buf: &mut [u8],
197     ) -> Result<usize, EncodeSliceError> {
198         fn inner<E>(
199             engine: &E,
200             input_bytes: &[u8],
201             output_buf: &mut [u8],
202         ) -> Result<usize, EncodeSliceError>
203         where
204             E: Engine + ?Sized,
205         {
206             let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
207                 .expect("usize overflow when calculating buffer size");
208 
209             if output_buf.len() < encoded_size {
210                 return Err(EncodeSliceError::OutputSliceTooSmall);
211             }
212 
213             let b64_output = &mut output_buf[0..encoded_size];
214 
215             encode_with_padding(input_bytes, b64_output, engine, encoded_size);
216 
217             Ok(encoded_size)
218         }
219 
220         inner(self, input.as_ref(), output_buf)
221     }
222 
223     /// Decode the input into a new `Vec`.
224     ///
225     /// # Example
226     ///
227     /// ```rust
228     /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
229     ///
230     /// let bytes = general_purpose::STANDARD
231     ///     .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
232     /// println!("{:?}", bytes);
233     ///
234     /// // custom engine setup
235     /// let bytes_url = engine::GeneralPurpose::new(
236     ///              &alphabet::URL_SAFE,
237     ///              general_purpose::NO_PAD)
238     ///     .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
239     /// println!("{:?}", bytes_url);
240     /// ```
241     #[cfg(any(feature = "alloc", test))]
242     #[inline]
decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError>243     fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
244         fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError>
245         where
246             E: Engine + ?Sized,
247         {
248             let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
249             let mut buffer = vec![0; estimate.decoded_len_estimate()];
250 
251             let bytes_written = engine
252                 .internal_decode(input_bytes, &mut buffer, estimate)
253                 .map_err(|e| match e {
254                     DecodeSliceError::DecodeError(e) => e,
255                     DecodeSliceError::OutputSliceTooSmall => {
256                         unreachable!("Vec is sized conservatively")
257                     }
258                 })?
259                 .decoded_len;
260 
261             buffer.truncate(bytes_written);
262 
263             Ok(buffer)
264         }
265 
266         inner(self, input.as_ref())
267     }
268 
269     /// Decode the `input` into the supplied `buffer`.
270     ///
271     /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
272     /// Returns a `Result` containing an empty tuple, aka `()`.
273     ///
274     /// # Example
275     ///
276     /// ```rust
277     /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
278     /// const CUSTOM_ENGINE: engine::GeneralPurpose =
279     ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
280     ///
281     /// fn main() {
282     ///     use base64::Engine;
283     ///     let mut buffer = Vec::<u8>::new();
284     ///     // with the default engine
285     ///     general_purpose::STANDARD
286     ///         .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
287     ///     println!("{:?}", buffer);
288     ///
289     ///     buffer.clear();
290     ///
291     ///     // with a custom engine
292     ///     CUSTOM_ENGINE.decode_vec(
293     ///         "aGVsbG8gaW50ZXJuZXR-Cg==",
294     ///         &mut buffer,
295     ///     ).unwrap();
296     ///     println!("{:?}", buffer);
297     /// }
298     /// ```
299     #[cfg(any(feature = "alloc", test))]
300     #[inline]
decode_vec<T: AsRef<[u8]>>( &self, input: T, buffer: &mut Vec<u8>, ) -> Result<(), DecodeError>301     fn decode_vec<T: AsRef<[u8]>>(
302         &self,
303         input: T,
304         buffer: &mut Vec<u8>,
305     ) -> Result<(), DecodeError> {
306         fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError>
307         where
308             E: Engine + ?Sized,
309         {
310             let starting_output_len = buffer.len();
311             let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
312 
313             let total_len_estimate = estimate
314                 .decoded_len_estimate()
315                 .checked_add(starting_output_len)
316                 .expect("Overflow when calculating output buffer length");
317 
318             buffer.resize(total_len_estimate, 0);
319 
320             let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
321 
322             let bytes_written = engine
323                 .internal_decode(input_bytes, buffer_slice, estimate)
324                 .map_err(|e| match e {
325                     DecodeSliceError::DecodeError(e) => e,
326                     DecodeSliceError::OutputSliceTooSmall => {
327                         unreachable!("Vec is sized conservatively")
328                     }
329                 })?
330                 .decoded_len;
331 
332             buffer.truncate(starting_output_len + bytes_written);
333 
334             Ok(())
335         }
336 
337         inner(self, input.as_ref(), buffer)
338     }
339 
340     /// Decode the input into the provided output slice.
341     ///
342     /// Returns the number of bytes written to the slice, or an error if `output` is smaller than
343     /// the estimated decoded length.
344     ///
345     /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
346     ///
347     /// See [crate::decoded_len_estimate] for calculating buffer sizes.
348     ///
349     /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
350     /// if the output buffer is too small.
351     #[inline]
decode_slice<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeSliceError>352     fn decode_slice<T: AsRef<[u8]>>(
353         &self,
354         input: T,
355         output: &mut [u8],
356     ) -> Result<usize, DecodeSliceError> {
357         fn inner<E>(
358             engine: &E,
359             input_bytes: &[u8],
360             output: &mut [u8],
361         ) -> Result<usize, DecodeSliceError>
362         where
363             E: Engine + ?Sized,
364         {
365             engine
366                 .internal_decode(
367                     input_bytes,
368                     output,
369                     engine.internal_decoded_len_estimate(input_bytes.len()),
370                 )
371                 .map(|dm| dm.decoded_len)
372         }
373 
374         inner(self, input.as_ref(), output)
375     }
376 
377     /// Decode the input into the provided output slice.
378     ///
379     /// Returns the number of bytes written to the slice.
380     ///
381     /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
382     ///
383     /// See [crate::decoded_len_estimate] for calculating buffer sizes.
384     ///
385     /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
386     /// buffer is too small.
387     ///
388     /// # Panics
389     ///
390     /// Panics if the provided output buffer is too small for the decoded data.
391     #[inline]
decode_slice_unchecked<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeError>392     fn decode_slice_unchecked<T: AsRef<[u8]>>(
393         &self,
394         input: T,
395         output: &mut [u8],
396     ) -> Result<usize, DecodeError> {
397         fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError>
398         where
399             E: Engine + ?Sized,
400         {
401             engine
402                 .internal_decode(
403                     input_bytes,
404                     output,
405                     engine.internal_decoded_len_estimate(input_bytes.len()),
406                 )
407                 .map(|dm| dm.decoded_len)
408                 .map_err(|e| match e {
409                     DecodeSliceError::DecodeError(e) => e,
410                     DecodeSliceError::OutputSliceTooSmall => {
411                         panic!("Output slice is too small")
412                     }
413                 })
414         }
415 
416         inner(self, input.as_ref(), output)
417     }
418 }
419 
420 /// The minimal level of configuration that engines must support.
421 pub trait Config {
422     /// Returns `true` if padding should be added after the encoded output.
423     ///
424     /// Padding is added outside the engine's encode() since the engine may be used
425     /// to encode only a chunk of the overall output, so it can't always know when
426     /// the output is "done" and would therefore need padding (if configured).
427     // It could be provided as a separate parameter when encoding, but that feels like
428     // leaking an implementation detail to the user, and it's hopefully more convenient
429     // to have to only pass one thing (the engine) to any part of the API.
encode_padding(&self) -> bool430     fn encode_padding(&self) -> bool;
431 }
432 
433 /// The decode estimate used by an engine implementation. Users do not need to interact with this;
434 /// it is only for engine implementors.
435 ///
436 /// Implementors may store relevant data here when constructing this to avoid having to calculate
437 /// them again during actual decoding.
438 pub trait DecodeEstimate {
439     /// Returns a conservative (err on the side of too big) estimate of the decoded length to use
440     /// for pre-allocating buffers, etc.
441     ///
442     /// The estimate must be no larger than the next largest complete triple of decoded bytes.
443     /// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
decoded_len_estimate(&self) -> usize444     fn decoded_len_estimate(&self) -> usize;
445 }
446 
447 /// Controls how pad bytes are handled when decoding.
448 ///
449 /// Each [Engine] must support at least the behavior indicated by
450 /// [DecodePaddingMode::RequireCanonical], and may support other modes.
451 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
452 pub enum DecodePaddingMode {
453     /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed.
454     Indifferent,
455     /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix).
456     RequireCanonical,
457     /// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
458     RequireNone,
459 }
460 
461 /// Metadata about the result of a decode operation
462 #[derive(PartialEq, Eq, Debug)]
463 pub struct DecodeMetadata {
464     /// Number of decoded bytes output
465     pub(crate) decoded_len: usize,
466     /// Offset of the first padding byte in the input, if any
467     pub(crate) padding_offset: Option<usize>,
468 }
469 
470 impl DecodeMetadata {
new(decoded_bytes: usize, padding_index: Option<usize>) -> Self471     pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
472         Self {
473             decoded_len: decoded_bytes,
474             padding_offset: padding_index,
475         }
476     }
477 }
478