1 //! Provides [Alphabet] and constants for alphabets commonly used in the wild. 2 3 use crate::PAD_BYTE; 4 use core::fmt; 5 #[cfg(any(feature = "std", test))] 6 use std::error; 7 8 const ALPHABET_SIZE: usize = 64; 9 10 /// An alphabet defines the 64 ASCII characters (symbols) used for base64. 11 /// 12 /// Common alphabets are provided as constants, and custom alphabets 13 /// can be made via `from_str` or the `TryFrom<str>` implementation. 14 /// 15 /// ``` 16 /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap(); 17 /// 18 /// let engine = base64::engine::GeneralPurpose::new( 19 /// &custom, 20 /// base64::engine::general_purpose::PAD); 21 /// ``` 22 #[derive(Clone, Debug, Eq, PartialEq)] 23 pub struct Alphabet { 24 pub(crate) symbols: [u8; ALPHABET_SIZE], 25 } 26 27 impl Alphabet { 28 /// Performs no checks so that it can be const. 29 /// Used only for known-valid strings. from_str_unchecked(alphabet: &str) -> Self30 const fn from_str_unchecked(alphabet: &str) -> Self { 31 let mut symbols = [0_u8; ALPHABET_SIZE]; 32 let source_bytes = alphabet.as_bytes(); 33 34 // a way to copy that's allowed in const fn 35 let mut index = 0; 36 while index < ALPHABET_SIZE { 37 symbols[index] = source_bytes[index]; 38 index += 1; 39 } 40 41 Self { symbols } 42 } 43 44 /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. 45 /// 46 /// The `=` byte is not allowed as it is used for padding. new(alphabet: &str) -> Result<Self, ParseAlphabetError>47 pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { 48 let bytes = alphabet.as_bytes(); 49 if bytes.len() != ALPHABET_SIZE { 50 return Err(ParseAlphabetError::InvalidLength); 51 } 52 53 { 54 let mut index = 0; 55 while index < ALPHABET_SIZE { 56 let byte = bytes[index]; 57 58 // must be ascii printable. 127 (DEL) is commonly considered printable 59 // for some reason but clearly unsuitable for base64. 60 if !(byte >= 32_u8 && byte <= 126_u8) { 61 return Err(ParseAlphabetError::UnprintableByte(byte)); 62 } 63 // = is assumed to be padding, so cannot be used as a symbol 64 if byte == PAD_BYTE { 65 return Err(ParseAlphabetError::ReservedByte(byte)); 66 } 67 68 // Check for duplicates while staying within what const allows. 69 // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit 70 // microsecond range. 71 72 let mut probe_index = 0; 73 while probe_index < ALPHABET_SIZE { 74 if probe_index == index { 75 probe_index += 1; 76 continue; 77 } 78 79 let probe_byte = bytes[probe_index]; 80 81 if byte == probe_byte { 82 return Err(ParseAlphabetError::DuplicatedByte(byte)); 83 } 84 85 probe_index += 1; 86 } 87 88 index += 1; 89 } 90 } 91 92 Ok(Self::from_str_unchecked(alphabet)) 93 } 94 } 95 96 impl TryFrom<&str> for Alphabet { 97 type Error = ParseAlphabetError; 98 try_from(value: &str) -> Result<Self, Self::Error>99 fn try_from(value: &str) -> Result<Self, Self::Error> { 100 Self::new(value) 101 } 102 } 103 104 /// Possible errors when constructing an [Alphabet] from a `str`. 105 #[derive(Debug, Eq, PartialEq)] 106 pub enum ParseAlphabetError { 107 /// Alphabets must be 64 ASCII bytes 108 InvalidLength, 109 /// All bytes must be unique 110 DuplicatedByte(u8), 111 /// All bytes must be printable (in the range `[32, 126]`). 112 UnprintableByte(u8), 113 /// `=` cannot be used 114 ReservedByte(u8), 115 } 116 117 impl fmt::Display for ParseAlphabetError { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result118 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 119 match self { 120 Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"), 121 Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b), 122 Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b), 123 Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b), 124 } 125 } 126 } 127 128 #[cfg(any(feature = "std", test))] 129 impl error::Error for ParseAlphabetError {} 130 131 /// The standard alphabet (uses `+` and `/`). 132 /// 133 /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3). 134 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( 135 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", 136 ); 137 138 /// The URL safe alphabet (uses `-` and `_`). 139 /// 140 /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4). 141 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( 142 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", 143 ); 144 145 /// The `crypt(3)` alphabet (uses `.` and `/` as the first two values). 146 /// 147 /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. 148 pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( 149 "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 150 ); 151 152 /// The bcrypt alphabet. 153 pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( 154 "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 155 ); 156 157 /// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`). 158 /// 159 /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) 160 pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( 161 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", 162 ); 163 164 /// The alphabet used in BinHex 4.0 files. 165 /// 166 /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) 167 pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( 168 "!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr", 169 ); 170 171 #[cfg(test)] 172 mod tests { 173 use crate::alphabet::*; 174 use std::convert::TryFrom as _; 175 176 #[test] detects_duplicate_start()177 fn detects_duplicate_start() { 178 assert_eq!( 179 ParseAlphabetError::DuplicatedByte(b'A'), 180 Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") 181 .unwrap_err() 182 ); 183 } 184 185 #[test] detects_duplicate_end()186 fn detects_duplicate_end() { 187 assert_eq!( 188 ParseAlphabetError::DuplicatedByte(b'/'), 189 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//") 190 .unwrap_err() 191 ); 192 } 193 194 #[test] detects_duplicate_middle()195 fn detects_duplicate_middle() { 196 assert_eq!( 197 ParseAlphabetError::DuplicatedByte(b'Z'), 198 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/") 199 .unwrap_err() 200 ); 201 } 202 203 #[test] detects_length()204 fn detects_length() { 205 assert_eq!( 206 ParseAlphabetError::InvalidLength, 207 Alphabet::new( 208 "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/", 209 ) 210 .unwrap_err() 211 ); 212 } 213 214 #[test] detects_padding()215 fn detects_padding() { 216 assert_eq!( 217 ParseAlphabetError::ReservedByte(b'='), 218 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=") 219 .unwrap_err() 220 ); 221 } 222 223 #[test] detects_unprintable()224 fn detects_unprintable() { 225 // form feed 226 assert_eq!( 227 ParseAlphabetError::UnprintableByte(0xc), 228 Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") 229 .unwrap_err() 230 ); 231 } 232 233 #[test] same_as_unchecked()234 fn same_as_unchecked() { 235 assert_eq!( 236 STANDARD, 237 Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") 238 .unwrap() 239 ); 240 } 241 } 242