1 // Copyright 2016 Brian Smith. 2 // Portions Copyright (c) 2016, Google Inc. 3 // 4 // Permission to use, copy, modify, and/or distribute this software for any 5 // purpose with or without fee is hereby granted, provided that the above 6 // copyright notice and this permission notice appear in all copies. 7 // 8 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 9 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY 11 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 13 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 14 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 16 use super::{quic::Sample, Nonce}; 17 use crate::{ 18 cpu, 19 polyfill::{array_map::Map, ChunksFixed}, 20 }; 21 22 #[cfg(any( 23 test, 24 not(any( 25 target_arch = "aarch64", 26 target_arch = "arm", 27 target_arch = "x86", 28 target_arch = "x86_64" 29 )) 30 ))] 31 mod fallback; 32 33 use core::ops::RangeFrom; 34 35 pub struct Key { 36 words: [u32; KEY_LEN / 4], 37 cpu_features: cpu::Features, 38 } 39 40 impl Key { new(value: [u8; KEY_LEN], cpu_features: cpu::Features) -> Self41 pub(super) fn new(value: [u8; KEY_LEN], cpu_features: cpu::Features) -> Self { 42 let value: &[[u8; 4]; KEY_LEN / 4] = value.chunks_fixed(); 43 Self { 44 words: value.array_map(u32::from_le_bytes), 45 cpu_features, 46 } 47 } 48 cpu_features(&self) -> cpu::Features49 pub(super) fn cpu_features(&self) -> cpu::Features { 50 self.cpu_features 51 } 52 } 53 54 impl Key { 55 #[inline] encrypt_in_place(&self, counter: Counter, in_out: &mut [u8])56 pub fn encrypt_in_place(&self, counter: Counter, in_out: &mut [u8]) { 57 self.encrypt_less_safe(counter, in_out, 0..); 58 } 59 60 #[inline] encrypt_iv_xor_in_place(&self, iv: Iv, in_out: &mut [u8; 32])61 pub fn encrypt_iv_xor_in_place(&self, iv: Iv, in_out: &mut [u8; 32]) { 62 // It is safe to use `into_counter_for_single_block_less_safe()` 63 // because `in_out` is exactly one block long. 64 debug_assert!(in_out.len() <= BLOCK_LEN); 65 self.encrypt_less_safe(iv.into_counter_for_single_block_less_safe(), in_out, 0..); 66 } 67 68 #[inline] new_mask(&self, sample: Sample) -> [u8; 5]69 pub fn new_mask(&self, sample: Sample) -> [u8; 5] { 70 let mut out: [u8; 5] = [0; 5]; 71 let iv = Iv::assume_unique_for_key(sample); 72 73 debug_assert!(out.len() <= BLOCK_LEN); 74 self.encrypt_less_safe(iv.into_counter_for_single_block_less_safe(), &mut out, 0..); 75 76 out 77 } 78 79 /// Analogous to `slice::copy_within()`. encrypt_within(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>)80 pub fn encrypt_within(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>) { 81 // XXX: The x86 and at least one branch of the ARM assembly language 82 // code doesn't allow overlapping input and output unless they are 83 // exactly overlapping. TODO: Figure out which branch of the ARM code 84 // has this limitation and come up with a better solution. 85 // 86 // https://rt.openssl.org/Ticket/Display.html?id=4362 87 if cfg!(any(target_arch = "arm", target_arch = "x86")) && src.start != 0 { 88 let len = in_out.len() - src.start; 89 in_out.copy_within(src, 0); 90 self.encrypt_in_place(counter, &mut in_out[..len]); 91 } else { 92 self.encrypt_less_safe(counter, in_out, src); 93 } 94 } 95 96 /// This is "less safe" because it skips the important check that `encrypt_within` does. 97 /// Only call this with `src` equal to `0..` or from `encrypt_within`. 98 #[inline] encrypt_less_safe(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>)99 fn encrypt_less_safe(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>) { 100 #[cfg(any( 101 target_arch = "aarch64", 102 target_arch = "arm", 103 target_arch = "x86", 104 target_arch = "x86_64" 105 ))] 106 #[inline(always)] 107 pub(super) fn ChaCha20_ctr32( 108 key: &Key, 109 counter: Counter, 110 in_out: &mut [u8], 111 src: RangeFrom<usize>, 112 ) { 113 let in_out_len = in_out.len().checked_sub(src.start).unwrap(); 114 115 // There's no need to worry if `counter` is incremented because it is 116 // owned here and we drop immediately after the call. 117 prefixed_extern! { 118 fn ChaCha20_ctr32( 119 out: *mut u8, 120 in_: *const u8, 121 in_len: crate::c::size_t, 122 key: &[u32; KEY_LEN / 4], 123 counter: &Counter, 124 ); 125 } 126 unsafe { 127 ChaCha20_ctr32( 128 in_out.as_mut_ptr(), 129 in_out[src].as_ptr(), 130 in_out_len, 131 key.words_less_safe(), 132 &counter, 133 ) 134 } 135 } 136 137 #[cfg(not(any( 138 target_arch = "aarch64", 139 target_arch = "arm", 140 target_arch = "x86", 141 target_arch = "x86_64" 142 )))] 143 use fallback::ChaCha20_ctr32; 144 145 ChaCha20_ctr32(self, counter, in_out, src); 146 } 147 148 #[inline] words_less_safe(&self) -> &[u32; KEY_LEN / 4]149 pub(super) fn words_less_safe(&self) -> &[u32; KEY_LEN / 4] { 150 &self.words 151 } 152 } 153 154 /// Counter || Nonce, all native endian. 155 #[repr(transparent)] 156 pub struct Counter([u32; 4]); 157 158 impl Counter { zero(nonce: Nonce) -> Self159 pub fn zero(nonce: Nonce) -> Self { 160 Self::from_nonce_and_ctr(nonce, 0) 161 } 162 from_nonce_and_ctr(nonce: Nonce, ctr: u32) -> Self163 fn from_nonce_and_ctr(nonce: Nonce, ctr: u32) -> Self { 164 let nonce = nonce.as_ref().chunks_fixed(); 165 Self([ 166 ctr, 167 u32::from_le_bytes(nonce[0]), 168 u32::from_le_bytes(nonce[1]), 169 u32::from_le_bytes(nonce[2]), 170 ]) 171 } 172 increment(&mut self) -> Iv173 pub fn increment(&mut self) -> Iv { 174 let iv = Iv(self.0); 175 self.0[0] += 1; 176 iv 177 } 178 179 /// This is "less safe" because it hands off management of the counter to 180 /// the caller. 181 #[cfg(any( 182 test, 183 not(any( 184 target_arch = "aarch64", 185 target_arch = "arm", 186 target_arch = "x86", 187 target_arch = "x86_64" 188 )) 189 ))] into_words_less_safe(self) -> [u32; 4]190 fn into_words_less_safe(self) -> [u32; 4] { 191 self.0 192 } 193 } 194 195 /// The IV for a single block encryption. 196 /// 197 /// Intentionally not `Clone` to ensure each is used only once. 198 pub struct Iv([u32; 4]); 199 200 impl Iv { assume_unique_for_key(value: [u8; 16]) -> Self201 fn assume_unique_for_key(value: [u8; 16]) -> Self { 202 let value: &[[u8; 4]; 4] = value.chunks_fixed(); 203 Self(value.array_map(u32::from_le_bytes)) 204 } 205 into_counter_for_single_block_less_safe(self) -> Counter206 fn into_counter_for_single_block_less_safe(self) -> Counter { 207 Counter(self.0) 208 } 209 } 210 211 pub const KEY_LEN: usize = 32; 212 213 const BLOCK_LEN: usize = 64; 214 215 #[cfg(test)] 216 mod tests { 217 use super::*; 218 use crate::{polyfill, test}; 219 use alloc::vec; 220 use core::convert::TryInto; 221 222 const MAX_ALIGNMENT_AND_OFFSET: (usize, usize) = (15, 259); 223 const MAX_ALIGNMENT_AND_OFFSET_SUBSET: (usize, usize) = 224 if cfg!(any(debug_assertions = "false", feature = "slow_tests")) { 225 MAX_ALIGNMENT_AND_OFFSET 226 } else { 227 (0, 0) 228 }; 229 230 #[test] chacha20_test_default()231 fn chacha20_test_default() { 232 // Always use `MAX_OFFSET` if we hav assembly code. 233 let max_offset = if cfg!(any( 234 target_arch = "aarch64", 235 target_arch = "arm", 236 target_arch = "x86", 237 target_arch = "x86_64" 238 )) { 239 MAX_ALIGNMENT_AND_OFFSET 240 } else { 241 MAX_ALIGNMENT_AND_OFFSET_SUBSET 242 }; 243 chacha20_test(max_offset, Key::encrypt_within); 244 } 245 246 // Smoketest the fallback implementation. 247 #[test] chacha20_test_fallback()248 fn chacha20_test_fallback() { 249 chacha20_test(MAX_ALIGNMENT_AND_OFFSET_SUBSET, fallback::ChaCha20_ctr32); 250 } 251 252 // Verifies the encryption is successful when done on overlapping buffers. 253 // 254 // On some branches of the 32-bit x86 and ARM assembly code the in-place 255 // operation fails in some situations where the input/output buffers are 256 // not exactly overlapping. Such failures are dependent not only on the 257 // degree of overlapping but also the length of the data. `encrypt_within` 258 // works around that. chacha20_test( max_alignment_and_offset: (usize, usize), f: impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>), )259 fn chacha20_test( 260 max_alignment_and_offset: (usize, usize), 261 f: impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>), 262 ) { 263 // Reuse a buffer to avoid slowing down the tests with allocations. 264 let mut buf = vec![0u8; 1300]; 265 266 test::run(test_file!("chacha_tests.txt"), move |section, test_case| { 267 assert_eq!(section, ""); 268 269 let key = test_case.consume_bytes("Key"); 270 let key: &[u8; KEY_LEN] = key.as_slice().try_into()?; 271 let key = Key::new(*key, cpu::features()); 272 273 let ctr = test_case.consume_usize("Ctr"); 274 let nonce = test_case.consume_bytes("Nonce"); 275 let input = test_case.consume_bytes("Input"); 276 let output = test_case.consume_bytes("Output"); 277 278 // Run the test case over all prefixes of the input because the 279 // behavior of ChaCha20 implementation changes dependent on the 280 // length of the input. 281 for len in 0..=input.len() { 282 chacha20_test_case_inner( 283 &key, 284 &nonce, 285 ctr as u32, 286 &input[..len], 287 &output[..len], 288 &mut buf, 289 max_alignment_and_offset, 290 &f, 291 ); 292 } 293 294 Ok(()) 295 }); 296 } 297 chacha20_test_case_inner( key: &Key, nonce: &[u8], ctr: u32, input: &[u8], expected: &[u8], buf: &mut [u8], (max_alignment, max_offset): (usize, usize), f: &impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>), )298 fn chacha20_test_case_inner( 299 key: &Key, 300 nonce: &[u8], 301 ctr: u32, 302 input: &[u8], 303 expected: &[u8], 304 buf: &mut [u8], 305 (max_alignment, max_offset): (usize, usize), 306 f: &impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>), 307 ) { 308 const ARBITRARY: u8 = 123; 309 310 for alignment in 0..=max_alignment { 311 polyfill::slice::fill(&mut buf[..alignment], ARBITRARY); 312 let buf = &mut buf[alignment..]; 313 for offset in 0..=max_offset { 314 let buf = &mut buf[..(offset + input.len())]; 315 polyfill::slice::fill(&mut buf[..offset], ARBITRARY); 316 let src = offset..; 317 buf[src.clone()].copy_from_slice(input); 318 319 let ctr = Counter::from_nonce_and_ctr( 320 Nonce::try_assume_unique_for_key(nonce).unwrap(), 321 ctr, 322 ); 323 f(key, ctr, buf, src); 324 assert_eq!(&buf[..input.len()], expected) 325 } 326 } 327 } 328 } 329