1 // Copyright 2018 Brian Smith.
2 //
3 // Permission to use, copy, modify, and/or distribute this software for any
4 // purpose with or without fee is hereby granted, provided that the above
5 // copyright notice and this permission notice appear in all copies.
6 //
7 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
10 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15 use super::{
16 block::{Block, BLOCK_LEN},
17 Aad,
18 };
19 use crate::cpu;
20 use core::ops::BitXorAssign;
21
22 #[cfg(not(target_arch = "aarch64"))]
23 mod gcm_nohw;
24
25 pub struct Key {
26 h_table: HTable,
27 cpu_features: cpu::Features,
28 }
29
30 impl Key {
new(h_be: Block, cpu_features: cpu::Features) -> Self31 pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self {
32 let h: [u64; 2] = h_be.into();
33
34 let mut key = Self {
35 h_table: HTable {
36 Htable: [u128 { hi: 0, lo: 0 }; HTABLE_LEN],
37 },
38 cpu_features,
39 };
40 let h_table = &mut key.h_table;
41
42 match detect_implementation(cpu_features) {
43 #[cfg(target_arch = "x86_64")]
44 Implementation::CLMUL if has_avx_movbe(cpu_features) => {
45 prefixed_extern! {
46 fn gcm_init_avx(HTable: &mut HTable, h: &[u64; 2]);
47 }
48 unsafe {
49 gcm_init_avx(h_table, &h);
50 }
51 }
52
53 #[cfg(any(
54 target_arch = "aarch64",
55 target_arch = "arm",
56 target_arch = "x86_64",
57 target_arch = "x86"
58 ))]
59 Implementation::CLMUL => {
60 prefixed_extern! {
61 fn gcm_init_clmul(Htable: &mut HTable, h: &[u64; 2]);
62 }
63 unsafe {
64 gcm_init_clmul(h_table, &h);
65 }
66 }
67
68 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
69 Implementation::NEON => {
70 prefixed_extern! {
71 fn gcm_init_neon(Htable: &mut HTable, h: &[u64; 2]);
72 }
73 unsafe {
74 gcm_init_neon(h_table, &h);
75 }
76 }
77
78 #[cfg(not(target_arch = "aarch64"))]
79 Implementation::Fallback => {
80 h_table.Htable[0] = gcm_nohw::init(h);
81 }
82 }
83
84 key
85 }
86 }
87
88 pub struct Context {
89 inner: ContextInner,
90 cpu_features: cpu::Features,
91 }
92
93 impl Context {
new(key: &Key, aad: Aad<&[u8]>) -> Self94 pub(crate) fn new(key: &Key, aad: Aad<&[u8]>) -> Self {
95 let mut ctx = Self {
96 inner: ContextInner {
97 Xi: Xi(Block::zero()),
98 _unused: Block::zero(),
99 Htable: key.h_table.clone(),
100 },
101 cpu_features: key.cpu_features,
102 };
103
104 for ad in aad.0.chunks(BLOCK_LEN) {
105 let mut block = Block::zero();
106 block.overwrite_part_at(0, ad);
107 ctx.update_block(block);
108 }
109
110 ctx
111 }
112
113 /// Access to `inner` for the integrated AES-GCM implementations only.
114 #[cfg(target_arch = "x86_64")]
115 #[inline]
inner(&mut self) -> &mut ContextInner116 pub(super) fn inner(&mut self) -> &mut ContextInner {
117 &mut self.inner
118 }
119
update_blocks(&mut self, input: &[u8])120 pub fn update_blocks(&mut self, input: &[u8]) {
121 // Th assembly functions take the input length in bytes, not blocks.
122 let input_bytes = input.len();
123
124 debug_assert_eq!(input_bytes % BLOCK_LEN, 0);
125 debug_assert!(input_bytes > 0);
126
127 let input = input.as_ptr() as *const [u8; BLOCK_LEN];
128 let input = unsafe { core::slice::from_raw_parts(input, input_bytes / BLOCK_LEN) };
129
130 // Although these functions take `Xi` and `h_table` as separate
131 // parameters, one or more of them might assume that they are part of
132 // the same `ContextInner` structure.
133 let xi = &mut self.inner.Xi;
134 let h_table = &self.inner.Htable;
135
136 match detect_implementation(self.cpu_features) {
137 #[cfg(target_arch = "x86_64")]
138 Implementation::CLMUL if has_avx_movbe(self.cpu_features) => {
139 prefixed_extern! {
140 fn gcm_ghash_avx(
141 xi: &mut Xi,
142 Htable: &HTable,
143 inp: *const [u8; BLOCK_LEN],
144 len: crate::c::size_t,
145 );
146 }
147 unsafe {
148 gcm_ghash_avx(xi, h_table, input.as_ptr(), input_bytes);
149 }
150 }
151
152 #[cfg(any(
153 target_arch = "aarch64",
154 target_arch = "arm",
155 target_arch = "x86_64",
156 target_arch = "x86"
157 ))]
158 Implementation::CLMUL => {
159 prefixed_extern! {
160 fn gcm_ghash_clmul(
161 xi: &mut Xi,
162 Htable: &HTable,
163 inp: *const [u8; BLOCK_LEN],
164 len: crate::c::size_t,
165 );
166 }
167 unsafe {
168 gcm_ghash_clmul(xi, h_table, input.as_ptr(), input_bytes);
169 }
170 }
171
172 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
173 Implementation::NEON => {
174 prefixed_extern! {
175 fn gcm_ghash_neon(
176 xi: &mut Xi,
177 Htable: &HTable,
178 inp: *const [u8; BLOCK_LEN],
179 len: crate::c::size_t,
180 );
181 }
182 unsafe {
183 gcm_ghash_neon(xi, h_table, input.as_ptr(), input_bytes);
184 }
185 }
186
187 #[cfg(not(target_arch = "aarch64"))]
188 Implementation::Fallback => {
189 gcm_nohw::ghash(xi, h_table.Htable[0], input);
190 }
191 }
192 }
193
update_block(&mut self, a: Block)194 pub fn update_block(&mut self, a: Block) {
195 self.inner.Xi.bitxor_assign(a);
196
197 // Although these functions take `Xi` and `h_table` as separate
198 // parameters, one or more of them might assume that they are part of
199 // the same `ContextInner` structure.
200 let xi = &mut self.inner.Xi;
201 let h_table = &self.inner.Htable;
202
203 match detect_implementation(self.cpu_features) {
204 #[cfg(any(
205 target_arch = "aarch64",
206 target_arch = "arm",
207 target_arch = "x86_64",
208 target_arch = "x86"
209 ))]
210 Implementation::CLMUL => {
211 prefixed_extern! {
212 fn gcm_gmult_clmul(xi: &mut Xi, Htable: &HTable);
213 }
214 unsafe {
215 gcm_gmult_clmul(xi, h_table);
216 }
217 }
218
219 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
220 Implementation::NEON => {
221 prefixed_extern! {
222 fn gcm_gmult_neon(xi: &mut Xi, Htable: &HTable);
223 }
224 unsafe {
225 gcm_gmult_neon(xi, h_table);
226 }
227 }
228
229 #[cfg(not(target_arch = "aarch64"))]
230 Implementation::Fallback => {
231 gcm_nohw::gmult(xi, h_table.Htable[0]);
232 }
233 }
234 }
235
pre_finish<F>(self, f: F) -> super::Tag where F: FnOnce(Block) -> super::Tag,236 pub(super) fn pre_finish<F>(self, f: F) -> super::Tag
237 where
238 F: FnOnce(Block) -> super::Tag,
239 {
240 f(self.inner.Xi.0)
241 }
242
243 #[cfg(target_arch = "x86_64")]
is_avx2(&self) -> bool244 pub(super) fn is_avx2(&self) -> bool {
245 match detect_implementation(self.cpu_features) {
246 Implementation::CLMUL => has_avx_movbe(self.cpu_features),
247 _ => false,
248 }
249 }
250 }
251
252 // The alignment is required by non-Rust code that uses `GCM128_CONTEXT`.
253 #[derive(Clone)]
254 #[repr(C, align(16))]
255 struct HTable {
256 Htable: [u128; HTABLE_LEN],
257 }
258
259 #[derive(Clone, Copy)]
260 #[repr(C)]
261 struct u128 {
262 hi: u64,
263 lo: u64,
264 }
265
266 const HTABLE_LEN: usize = 16;
267
268 #[repr(transparent)]
269 pub struct Xi(Block);
270
271 impl BitXorAssign<Block> for Xi {
272 #[inline]
bitxor_assign(&mut self, a: Block)273 fn bitxor_assign(&mut self, a: Block) {
274 self.0 ^= a;
275 }
276 }
277
278 impl From<Xi> for Block {
279 #[inline]
from(Xi(block): Xi) -> Self280 fn from(Xi(block): Xi) -> Self {
281 block
282 }
283 }
284
285 // This corresponds roughly to the `GCM128_CONTEXT` structure in BoringSSL.
286 // Some assembly language code, in particular the MOVEBE+AVX2 X86-64
287 // implementation, requires this exact layout.
288 #[repr(C, align(16))]
289 pub(super) struct ContextInner {
290 Xi: Xi,
291 _unused: Block,
292 Htable: HTable,
293 }
294
295 enum Implementation {
296 #[cfg(any(
297 target_arch = "aarch64",
298 target_arch = "arm",
299 target_arch = "x86_64",
300 target_arch = "x86"
301 ))]
302 CLMUL,
303
304 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
305 NEON,
306
307 #[cfg(not(target_arch = "aarch64"))]
308 Fallback,
309 }
310
311 #[inline]
detect_implementation(cpu_features: cpu::Features) -> Implementation312 fn detect_implementation(cpu_features: cpu::Features) -> Implementation {
313 // `cpu_features` is only used for specific platforms.
314 #[cfg(not(any(
315 target_arch = "aarch64",
316 target_arch = "arm",
317 target_arch = "x86_64",
318 target_arch = "x86"
319 )))]
320 let _cpu_features = cpu_features;
321
322 #[cfg(any(
323 target_arch = "aarch64",
324 target_arch = "arm",
325 target_arch = "x86_64",
326 target_arch = "x86"
327 ))]
328 {
329 if (cpu::intel::FXSR.available(cpu_features)
330 && cpu::intel::PCLMULQDQ.available(cpu_features))
331 || cpu::arm::PMULL.available(cpu_features)
332 {
333 return Implementation::CLMUL;
334 }
335 }
336
337 #[cfg(target_arch = "arm")]
338 {
339 if cpu::arm::NEON.available(cpu_features) {
340 return Implementation::NEON;
341 }
342 }
343
344 #[cfg(target_arch = "aarch64")]
345 {
346 return Implementation::NEON;
347 }
348
349 #[cfg(not(target_arch = "aarch64"))]
350 Implementation::Fallback
351 }
352
353 #[cfg(target_arch = "x86_64")]
has_avx_movbe(cpu_features: cpu::Features) -> bool354 fn has_avx_movbe(cpu_features: cpu::Features) -> bool {
355 cpu::intel::AVX.available(cpu_features) && cpu::intel::MOVBE.available(cpu_features)
356 }
357