• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2 
3 use crate::PAD_BYTE;
4 use core::fmt;
5 #[cfg(any(feature = "std", test))]
6 use std::error;
7 
8 const ALPHABET_SIZE: usize = 64;
9 
10 /// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11 ///
12 /// Common alphabets are provided as constants, and custom alphabets
13 /// can be made via `from_str` or the `TryFrom<str>` implementation.
14 ///
15 /// ```
16 /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
17 ///
18 /// let engine = base64::engine::GeneralPurpose::new(
19 ///     &custom,
20 ///     base64::engine::general_purpose::PAD);
21 /// ```
22 #[derive(Clone, Debug, Eq, PartialEq)]
23 pub struct Alphabet {
24     pub(crate) symbols: [u8; ALPHABET_SIZE],
25 }
26 
27 impl Alphabet {
28     /// Performs no checks so that it can be const.
29     /// Used only for known-valid strings.
from_str_unchecked(alphabet: &str) -> Self30     const fn from_str_unchecked(alphabet: &str) -> Self {
31         let mut symbols = [0_u8; ALPHABET_SIZE];
32         let source_bytes = alphabet.as_bytes();
33 
34         // a way to copy that's allowed in const fn
35         let mut index = 0;
36         while index < ALPHABET_SIZE {
37             symbols[index] = source_bytes[index];
38             index += 1;
39         }
40 
41         Self { symbols }
42     }
43 
44     /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
45     ///
46     /// The `=` byte is not allowed as it is used for padding.
new(alphabet: &str) -> Result<Self, ParseAlphabetError>47     pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
48         let bytes = alphabet.as_bytes();
49         if bytes.len() != ALPHABET_SIZE {
50             return Err(ParseAlphabetError::InvalidLength);
51         }
52 
53         {
54             let mut index = 0;
55             while index < ALPHABET_SIZE {
56                 let byte = bytes[index];
57 
58                 // must be ascii printable. 127 (DEL) is commonly considered printable
59                 // for some reason but clearly unsuitable for base64.
60                 if !(byte >= 32_u8 && byte <= 126_u8) {
61                     return Err(ParseAlphabetError::UnprintableByte(byte));
62                 }
63                 // = is assumed to be padding, so cannot be used as a symbol
64                 if byte == PAD_BYTE {
65                     return Err(ParseAlphabetError::ReservedByte(byte));
66                 }
67 
68                 // Check for duplicates while staying within what const allows.
69                 // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
70                 // microsecond range.
71 
72                 let mut probe_index = 0;
73                 while probe_index < ALPHABET_SIZE {
74                     if probe_index == index {
75                         probe_index += 1;
76                         continue;
77                     }
78 
79                     let probe_byte = bytes[probe_index];
80 
81                     if byte == probe_byte {
82                         return Err(ParseAlphabetError::DuplicatedByte(byte));
83                     }
84 
85                     probe_index += 1;
86                 }
87 
88                 index += 1;
89             }
90         }
91 
92         Ok(Self::from_str_unchecked(alphabet))
93     }
94 }
95 
96 impl TryFrom<&str> for Alphabet {
97     type Error = ParseAlphabetError;
98 
try_from(value: &str) -> Result<Self, Self::Error>99     fn try_from(value: &str) -> Result<Self, Self::Error> {
100         Self::new(value)
101     }
102 }
103 
104 /// Possible errors when constructing an [Alphabet] from a `str`.
105 #[derive(Debug, Eq, PartialEq)]
106 pub enum ParseAlphabetError {
107     /// Alphabets must be 64 ASCII bytes
108     InvalidLength,
109     /// All bytes must be unique
110     DuplicatedByte(u8),
111     /// All bytes must be printable (in the range `[32, 126]`).
112     UnprintableByte(u8),
113     /// `=` cannot be used
114     ReservedByte(u8),
115 }
116 
117 impl fmt::Display for ParseAlphabetError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result118     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119         match self {
120             Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
121             Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
122             Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
123             Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
124         }
125     }
126 }
127 
128 #[cfg(any(feature = "std", test))]
129 impl error::Error for ParseAlphabetError {}
130 
131 /// The standard alphabet (uses `+` and `/`).
132 ///
133 /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
134 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
135     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
136 );
137 
138 /// The URL safe alphabet (uses `-` and `_`).
139 ///
140 /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
141 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
142     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
143 );
144 
145 /// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
146 ///
147 /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
148 pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
149     "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
150 );
151 
152 /// The bcrypt alphabet.
153 pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
154     "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
155 );
156 
157 /// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
158 ///
159 /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
160 pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
161     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
162 );
163 
164 /// The alphabet used in BinHex 4.0 files.
165 ///
166 /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
167 pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
168     "!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
169 );
170 
171 #[cfg(test)]
172 mod tests {
173     use crate::alphabet::*;
174     use std::convert::TryFrom as _;
175 
176     #[test]
detects_duplicate_start()177     fn detects_duplicate_start() {
178         assert_eq!(
179             ParseAlphabetError::DuplicatedByte(b'A'),
180             Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
181                 .unwrap_err()
182         );
183     }
184 
185     #[test]
detects_duplicate_end()186     fn detects_duplicate_end() {
187         assert_eq!(
188             ParseAlphabetError::DuplicatedByte(b'/'),
189             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
190                 .unwrap_err()
191         );
192     }
193 
194     #[test]
detects_duplicate_middle()195     fn detects_duplicate_middle() {
196         assert_eq!(
197             ParseAlphabetError::DuplicatedByte(b'Z'),
198             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
199                 .unwrap_err()
200         );
201     }
202 
203     #[test]
detects_length()204     fn detects_length() {
205         assert_eq!(
206             ParseAlphabetError::InvalidLength,
207             Alphabet::new(
208                 "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
209             )
210             .unwrap_err()
211         );
212     }
213 
214     #[test]
detects_padding()215     fn detects_padding() {
216         assert_eq!(
217             ParseAlphabetError::ReservedByte(b'='),
218             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
219                 .unwrap_err()
220         );
221     }
222 
223     #[test]
detects_unprintable()224     fn detects_unprintable() {
225         // form feed
226         assert_eq!(
227             ParseAlphabetError::UnprintableByte(0xc),
228             Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
229                 .unwrap_err()
230         );
231     }
232 
233     #[test]
same_as_unchecked()234     fn same_as_unchecked() {
235         assert_eq!(
236             STANDARD,
237             Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
238                 .unwrap()
239         );
240     }
241 }
242