1 /* ====================================================================
2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ==================================================================== */
48
49 #include <openssl/base.h>
50
51 #include <assert.h>
52 #include <string.h>
53
54 #include <openssl/mem.h>
55 #include <openssl/cpu.h>
56
57 #include "internal.h"
58 #include "../../internal.h"
59
60 #if !defined(OPENSSL_NO_ASM) && \
61 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
62 defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
63 defined(OPENSSL_PPC64LE))
64 #define GHASH_ASM
65 #endif
66
67 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
68 #define REDUCE1BIT(V) \
69 do { \
70 if (sizeof(size_t) == 8) { \
71 uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
72 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
73 (V).hi = ((V).hi >> 1) ^ T; \
74 } else { \
75 uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \
76 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
77 (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \
78 } \
79 } while (0)
80
81 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
82 // bits of a |size_t|.
83 static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
84
gcm_init_4bit(u128 Htable[16],uint64_t H[2])85 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
86 u128 V;
87
88 Htable[0].hi = 0;
89 Htable[0].lo = 0;
90 V.hi = H[0];
91 V.lo = H[1];
92
93 Htable[8] = V;
94 REDUCE1BIT(V);
95 Htable[4] = V;
96 REDUCE1BIT(V);
97 Htable[2] = V;
98 REDUCE1BIT(V);
99 Htable[1] = V;
100 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
101 V = Htable[4];
102 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
103 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
104 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
105 V = Htable[8];
106 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
107 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
108 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
109 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
110 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
111 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
112 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
113
114 #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
115 for (int j = 0; j < 16; ++j) {
116 V = Htable[j];
117 Htable[j].hi = V.lo;
118 Htable[j].lo = V.hi;
119 }
120 #endif
121 }
122
123 #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
124 static const size_t rem_4bit[16] = {
125 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
126 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
127 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
128 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
129
gcm_gmult_4bit(uint64_t Xi[2],const u128 Htable[16])130 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
131 u128 Z;
132 int cnt = 15;
133 size_t rem, nlo, nhi;
134
135 nlo = ((const uint8_t *)Xi)[15];
136 nhi = nlo >> 4;
137 nlo &= 0xf;
138
139 Z.hi = Htable[nlo].hi;
140 Z.lo = Htable[nlo].lo;
141
142 while (1) {
143 rem = (size_t)Z.lo & 0xf;
144 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
145 Z.hi = (Z.hi >> 4);
146 if (sizeof(size_t) == 8) {
147 Z.hi ^= rem_4bit[rem];
148 } else {
149 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
150 }
151
152 Z.hi ^= Htable[nhi].hi;
153 Z.lo ^= Htable[nhi].lo;
154
155 if (--cnt < 0) {
156 break;
157 }
158
159 nlo = ((const uint8_t *)Xi)[cnt];
160 nhi = nlo >> 4;
161 nlo &= 0xf;
162
163 rem = (size_t)Z.lo & 0xf;
164 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
165 Z.hi = (Z.hi >> 4);
166 if (sizeof(size_t) == 8) {
167 Z.hi ^= rem_4bit[rem];
168 } else {
169 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
170 }
171
172 Z.hi ^= Htable[nlo].hi;
173 Z.lo ^= Htable[nlo].lo;
174 }
175
176 Xi[0] = CRYPTO_bswap8(Z.hi);
177 Xi[1] = CRYPTO_bswap8(Z.lo);
178 }
179
180 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
181 * details... Compiler-generated code doesn't seem to give any
182 * performance improvement, at least not on x86[_64]. It's here
183 * mostly as reference and a placeholder for possible future
184 * non-trivial optimization[s]... */
gcm_ghash_4bit(uint64_t Xi[2],const u128 Htable[16],const uint8_t * inp,size_t len)185 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
186 const uint8_t *inp, size_t len) {
187 u128 Z;
188 int cnt;
189 size_t rem, nlo, nhi;
190
191 do {
192 cnt = 15;
193 nlo = ((const uint8_t *)Xi)[15];
194 nlo ^= inp[15];
195 nhi = nlo >> 4;
196 nlo &= 0xf;
197
198 Z.hi = Htable[nlo].hi;
199 Z.lo = Htable[nlo].lo;
200
201 while (1) {
202 rem = (size_t)Z.lo & 0xf;
203 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
204 Z.hi = (Z.hi >> 4);
205 if (sizeof(size_t) == 8) {
206 Z.hi ^= rem_4bit[rem];
207 } else {
208 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
209 }
210
211 Z.hi ^= Htable[nhi].hi;
212 Z.lo ^= Htable[nhi].lo;
213
214 if (--cnt < 0) {
215 break;
216 }
217
218 nlo = ((const uint8_t *)Xi)[cnt];
219 nlo ^= inp[cnt];
220 nhi = nlo >> 4;
221 nlo &= 0xf;
222
223 rem = (size_t)Z.lo & 0xf;
224 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
225 Z.hi = (Z.hi >> 4);
226 if (sizeof(size_t) == 8) {
227 Z.hi ^= rem_4bit[rem];
228 } else {
229 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
230 }
231
232 Z.hi ^= Htable[nlo].hi;
233 Z.lo ^= Htable[nlo].lo;
234 }
235
236 Xi[0] = CRYPTO_bswap8(Z.hi);
237 Xi[1] = CRYPTO_bswap8(Z.lo);
238 } while (inp += 16, len -= 16);
239 }
240 #else /* GHASH_ASM */
241 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
242 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
243 size_t len);
244 #endif
245
246 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable)
247 #if defined(GHASH_ASM)
248 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
249 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
250 * trashing effect. In other words idea is to hash data while it's
251 * still in L1 cache after encryption pass... */
252 #define GHASH_CHUNK (3 * 1024)
253 #endif
254
255
256 #if defined(GHASH_ASM)
257
258 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
259 #define GCM_FUNCREF_4BIT
260 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
261 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
262 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
263 size_t len);
264
265 #if defined(OPENSSL_X86_64)
266 #define GHASH_ASM_X86_64
267 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
268 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
269 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
270 size_t len);
271 #define AESNI_GCM
272 size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
273 const void *key, uint8_t ivec[16], uint64_t *Xi);
274 size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
275 const void *key, uint8_t ivec[16], uint64_t *Xi);
276 #endif
277
278 #if defined(OPENSSL_X86)
279 #define GHASH_ASM_X86
280 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
281 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
282 size_t len);
283 #endif
284
285 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
286 #include <openssl/arm_arch.h>
287 #if __ARM_ARCH__ >= 7
288 #define GHASH_ASM_ARM
289 #define GCM_FUNCREF_4BIT
290
pmull_capable(void)291 static int pmull_capable(void) {
292 return CRYPTO_is_ARMv8_PMULL_capable();
293 }
294
295 void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
296 void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
297 void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
298 size_t len);
299
300 #if defined(OPENSSL_ARM)
301 /* 32-bit ARM also has support for doing GCM with NEON instructions. */
neon_capable(void)302 static int neon_capable(void) {
303 return CRYPTO_is_NEON_capable();
304 }
305
306 void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
307 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
308 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
309 size_t len);
310 #else
311 /* AArch64 only has the ARMv8 versions of functions. */
neon_capable(void)312 static int neon_capable(void) {
313 return 0;
314 }
gcm_init_neon(u128 Htable[16],const uint64_t Xi[2])315 static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
316 abort();
317 }
gcm_gmult_neon(uint64_t Xi[2],const u128 Htable[16])318 static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
319 abort();
320 }
gcm_ghash_neon(uint64_t Xi[2],const u128 Htable[16],const uint8_t * inp,size_t len)321 static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
322 const uint8_t *inp, size_t len) {
323 abort();
324 }
325 #endif
326
327 #endif
328 #elif defined(OPENSSL_PPC64LE)
329 #define GHASH_ASM_PPC64LE
330 #define GCM_FUNCREF_4BIT
331 void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
332 void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
333 void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
334 size_t len);
335 #endif
336 #endif
337
338 #ifdef GCM_FUNCREF_4BIT
339 #undef GCM_MUL
340 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable)
341 #ifdef GHASH
342 #undef GHASH
343 #define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len)
344 #endif
345 #endif
346
CRYPTO_ghash_init(gmult_func * out_mult,ghash_func * out_hash,u128 * out_key,u128 out_table[16],int * out_is_avx,const uint8_t * gcm_key)347 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
348 u128 *out_key, u128 out_table[16],
349 int *out_is_avx,
350 const uint8_t *gcm_key) {
351 *out_is_avx = 0;
352
353 union {
354 uint64_t u[2];
355 uint8_t c[16];
356 } H;
357
358 OPENSSL_memcpy(H.c, gcm_key, 16);
359
360 /* H is stored in host byte order */
361 H.u[0] = CRYPTO_bswap8(H.u[0]);
362 H.u[1] = CRYPTO_bswap8(H.u[1]);
363
364 OPENSSL_memcpy(out_key, H.c, 16);
365
366 #if defined(GHASH_ASM_X86_64)
367 if (crypto_gcm_clmul_enabled()) {
368 if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
369 gcm_init_avx(out_table, H.u);
370 *out_mult = gcm_gmult_avx;
371 *out_hash = gcm_ghash_avx;
372 *out_is_avx = 1;
373 return;
374 }
375 gcm_init_clmul(out_table, H.u);
376 *out_mult = gcm_gmult_clmul;
377 *out_hash = gcm_ghash_clmul;
378 return;
379 }
380 #elif defined(GHASH_ASM_X86)
381 if (crypto_gcm_clmul_enabled()) {
382 gcm_init_clmul(out_table, H.u);
383 *out_mult = gcm_gmult_clmul;
384 *out_hash = gcm_ghash_clmul;
385 return;
386 }
387 #elif defined(GHASH_ASM_ARM)
388 if (pmull_capable()) {
389 gcm_init_v8(out_table, H.u);
390 *out_mult = gcm_gmult_v8;
391 *out_hash = gcm_ghash_v8;
392 return;
393 }
394
395 if (neon_capable()) {
396 gcm_init_neon(out_table, H.u);
397 *out_mult = gcm_gmult_neon;
398 *out_hash = gcm_ghash_neon;
399 return;
400 }
401 #elif defined(GHASH_ASM_PPC64LE)
402 if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
403 gcm_init_p8(out_table, H.u);
404 *out_mult = gcm_gmult_p8;
405 *out_hash = gcm_ghash_p8;
406 return;
407 }
408 #endif
409
410 gcm_init_4bit(out_table, H.u);
411 #if defined(GHASH_ASM_X86)
412 *out_mult = gcm_gmult_4bit_mmx;
413 *out_hash = gcm_ghash_4bit_mmx;
414 #else
415 *out_mult = gcm_gmult_4bit;
416 *out_hash = gcm_ghash_4bit;
417 #endif
418 }
419
CRYPTO_gcm128_init(GCM128_CONTEXT * ctx,const void * aes_key,block128_f block,int is_aesni_encrypt)420 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key,
421 block128_f block, int is_aesni_encrypt) {
422 OPENSSL_memset(ctx, 0, sizeof(*ctx));
423 ctx->block = block;
424
425 uint8_t gcm_key[16];
426 OPENSSL_memset(gcm_key, 0, sizeof(gcm_key));
427 (*block)(gcm_key, gcm_key, aes_key);
428
429 int is_avx;
430 CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, &is_avx,
431 gcm_key);
432
433 ctx->use_aesni_gcm_crypt = (is_avx && is_aesni_encrypt) ? 1 : 0;
434 }
435
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const void * key,const uint8_t * iv,size_t len)436 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
437 const uint8_t *iv, size_t len) {
438 unsigned int ctr;
439 #ifdef GCM_FUNCREF_4BIT
440 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
441 #endif
442
443 ctx->Yi.u[0] = 0;
444 ctx->Yi.u[1] = 0;
445 ctx->Xi.u[0] = 0;
446 ctx->Xi.u[1] = 0;
447 ctx->len.u[0] = 0; /* AAD length */
448 ctx->len.u[1] = 0; /* message length */
449 ctx->ares = 0;
450 ctx->mres = 0;
451
452 if (len == 12) {
453 OPENSSL_memcpy(ctx->Yi.c, iv, 12);
454 ctx->Yi.c[15] = 1;
455 ctr = 1;
456 } else {
457 uint64_t len0 = len;
458
459 while (len >= 16) {
460 for (size_t i = 0; i < 16; ++i) {
461 ctx->Yi.c[i] ^= iv[i];
462 }
463 GCM_MUL(ctx, Yi);
464 iv += 16;
465 len -= 16;
466 }
467 if (len) {
468 for (size_t i = 0; i < len; ++i) {
469 ctx->Yi.c[i] ^= iv[i];
470 }
471 GCM_MUL(ctx, Yi);
472 }
473 len0 <<= 3;
474 ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
475
476 GCM_MUL(ctx, Yi);
477 ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
478 }
479
480 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
481 ++ctr;
482 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
483 }
484
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const uint8_t * aad,size_t len)485 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
486 unsigned int n;
487 uint64_t alen = ctx->len.u[0];
488 #ifdef GCM_FUNCREF_4BIT
489 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
490 #ifdef GHASH
491 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
492 size_t len) = ctx->ghash;
493 #endif
494 #endif
495
496 if (ctx->len.u[1]) {
497 return 0;
498 }
499
500 alen += len;
501 if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
502 return 0;
503 }
504 ctx->len.u[0] = alen;
505
506 n = ctx->ares;
507 if (n) {
508 while (n && len) {
509 ctx->Xi.c[n] ^= *(aad++);
510 --len;
511 n = (n + 1) % 16;
512 }
513 if (n == 0) {
514 GCM_MUL(ctx, Xi);
515 } else {
516 ctx->ares = n;
517 return 1;
518 }
519 }
520
521 /* Process a whole number of blocks. */
522 #ifdef GHASH
523 size_t len_blocks = len & kSizeTWithoutLower4Bits;
524 if (len_blocks != 0) {
525 GHASH(ctx, aad, len_blocks);
526 aad += len_blocks;
527 len -= len_blocks;
528 }
529 #else
530 while (len >= 16) {
531 for (size_t i = 0; i < 16; ++i) {
532 ctx->Xi.c[i] ^= aad[i];
533 }
534 GCM_MUL(ctx, Xi);
535 aad += 16;
536 len -= 16;
537 }
538 #endif
539
540 /* Process the remainder. */
541 if (len != 0) {
542 n = (unsigned int)len;
543 for (size_t i = 0; i < len; ++i) {
544 ctx->Xi.c[i] ^= aad[i];
545 }
546 }
547
548 ctx->ares = n;
549 return 1;
550 }
551
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const void * key,const unsigned char * in,unsigned char * out,size_t len)552 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
553 const unsigned char *in, unsigned char *out,
554 size_t len) {
555 unsigned int n, ctr;
556 uint64_t mlen = ctx->len.u[1];
557 block128_f block = ctx->block;
558 #ifdef GCM_FUNCREF_4BIT
559 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
560 #ifdef GHASH
561 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
562 size_t len) = ctx->ghash;
563 #endif
564 #endif
565
566 mlen += len;
567 if (mlen > ((UINT64_C(1) << 36) - 32) ||
568 (sizeof(len) == 8 && mlen < len)) {
569 return 0;
570 }
571 ctx->len.u[1] = mlen;
572
573 if (ctx->ares) {
574 /* First call to encrypt finalizes GHASH(AAD) */
575 GCM_MUL(ctx, Xi);
576 ctx->ares = 0;
577 }
578
579 ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
580
581 n = ctx->mres;
582 if (n) {
583 while (n && len) {
584 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
585 --len;
586 n = (n + 1) % 16;
587 }
588 if (n == 0) {
589 GCM_MUL(ctx, Xi);
590 } else {
591 ctx->mres = n;
592 return 1;
593 }
594 }
595 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
596 for (size_t i = 0; i < len; ++i) {
597 if (n == 0) {
598 (*block)(ctx->Yi.c, ctx->EKi.c, key);
599 ++ctr;
600 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
601 }
602 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
603 n = (n + 1) % 16;
604 if (n == 0) {
605 GCM_MUL(ctx, Xi);
606 }
607 }
608
609 ctx->mres = n;
610 return 1;
611 }
612 #if defined(GHASH) && defined(GHASH_CHUNK)
613 while (len >= GHASH_CHUNK) {
614 size_t j = GHASH_CHUNK;
615
616 while (j) {
617 size_t *out_t = (size_t *)out;
618 const size_t *in_t = (const size_t *)in;
619
620 (*block)(ctx->Yi.c, ctx->EKi.c, key);
621 ++ctr;
622 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
623 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
624 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
625 }
626 out += 16;
627 in += 16;
628 j -= 16;
629 }
630 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
631 len -= GHASH_CHUNK;
632 }
633 size_t len_blocks = len & kSizeTWithoutLower4Bits;
634 if (len_blocks != 0) {
635 while (len >= 16) {
636 size_t *out_t = (size_t *)out;
637 const size_t *in_t = (const size_t *)in;
638
639 (*block)(ctx->Yi.c, ctx->EKi.c, key);
640 ++ctr;
641 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
642 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
643 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
644 }
645 out += 16;
646 in += 16;
647 len -= 16;
648 }
649 GHASH(ctx, out - len_blocks, len_blocks);
650 }
651 #else
652 while (len >= 16) {
653 size_t *out_t = (size_t *)out;
654 const size_t *in_t = (const size_t *)in;
655
656 (*block)(ctx->Yi.c, ctx->EKi.c, key);
657 ++ctr;
658 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
659 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
660 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
661 }
662 GCM_MUL(ctx, Xi);
663 out += 16;
664 in += 16;
665 len -= 16;
666 }
667 #endif
668 if (len) {
669 (*block)(ctx->Yi.c, ctx->EKi.c, key);
670 ++ctr;
671 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
672 while (len--) {
673 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
674 ++n;
675 }
676 }
677
678 ctx->mres = n;
679 return 1;
680 }
681
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const void * key,const unsigned char * in,unsigned char * out,size_t len)682 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
683 const unsigned char *in, unsigned char *out,
684 size_t len) {
685 unsigned int n, ctr;
686 uint64_t mlen = ctx->len.u[1];
687 block128_f block = ctx->block;
688 #ifdef GCM_FUNCREF_4BIT
689 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
690 #ifdef GHASH
691 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
692 size_t len) = ctx->ghash;
693 #endif
694 #endif
695
696 mlen += len;
697 if (mlen > ((UINT64_C(1) << 36) - 32) ||
698 (sizeof(len) == 8 && mlen < len)) {
699 return 0;
700 }
701 ctx->len.u[1] = mlen;
702
703 if (ctx->ares) {
704 /* First call to decrypt finalizes GHASH(AAD) */
705 GCM_MUL(ctx, Xi);
706 ctx->ares = 0;
707 }
708
709 ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
710
711 n = ctx->mres;
712 if (n) {
713 while (n && len) {
714 uint8_t c = *(in++);
715 *(out++) = c ^ ctx->EKi.c[n];
716 ctx->Xi.c[n] ^= c;
717 --len;
718 n = (n + 1) % 16;
719 }
720 if (n == 0) {
721 GCM_MUL(ctx, Xi);
722 } else {
723 ctx->mres = n;
724 return 1;
725 }
726 }
727 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
728 for (size_t i = 0; i < len; ++i) {
729 uint8_t c;
730 if (n == 0) {
731 (*block)(ctx->Yi.c, ctx->EKi.c, key);
732 ++ctr;
733 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
734 }
735 c = in[i];
736 out[i] = c ^ ctx->EKi.c[n];
737 ctx->Xi.c[n] ^= c;
738 n = (n + 1) % 16;
739 if (n == 0) {
740 GCM_MUL(ctx, Xi);
741 }
742 }
743
744 ctx->mres = n;
745 return 1;
746 }
747 #if defined(GHASH) && defined(GHASH_CHUNK)
748 while (len >= GHASH_CHUNK) {
749 size_t j = GHASH_CHUNK;
750
751 GHASH(ctx, in, GHASH_CHUNK);
752 while (j) {
753 size_t *out_t = (size_t *)out;
754 const size_t *in_t = (const size_t *)in;
755
756 (*block)(ctx->Yi.c, ctx->EKi.c, key);
757 ++ctr;
758 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
759 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
760 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
761 }
762 out += 16;
763 in += 16;
764 j -= 16;
765 }
766 len -= GHASH_CHUNK;
767 }
768 size_t len_blocks = len & kSizeTWithoutLower4Bits;
769 if (len_blocks != 0) {
770 GHASH(ctx, in, len_blocks);
771 while (len >= 16) {
772 size_t *out_t = (size_t *)out;
773 const size_t *in_t = (const size_t *)in;
774
775 (*block)(ctx->Yi.c, ctx->EKi.c, key);
776 ++ctr;
777 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
778 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
779 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
780 }
781 out += 16;
782 in += 16;
783 len -= 16;
784 }
785 }
786 #else
787 while (len >= 16) {
788 size_t *out_t = (size_t *)out;
789 const size_t *in_t = (const size_t *)in;
790
791 (*block)(ctx->Yi.c, ctx->EKi.c, key);
792 ++ctr;
793 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
794 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
795 size_t c = in_t[i];
796 out_t[i] = c ^ ctx->EKi.t[i];
797 ctx->Xi.t[i] ^= c;
798 }
799 GCM_MUL(ctx, Xi);
800 out += 16;
801 in += 16;
802 len -= 16;
803 }
804 #endif
805 if (len) {
806 (*block)(ctx->Yi.c, ctx->EKi.c, key);
807 ++ctr;
808 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
809 while (len--) {
810 uint8_t c = in[n];
811 ctx->Xi.c[n] ^= c;
812 out[n] = c ^ ctx->EKi.c[n];
813 ++n;
814 }
815 }
816
817 ctx->mres = n;
818 return 1;
819 }
820
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const void * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)821 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
822 const uint8_t *in, uint8_t *out, size_t len,
823 ctr128_f stream) {
824 unsigned int n, ctr;
825 uint64_t mlen = ctx->len.u[1];
826 #ifdef GCM_FUNCREF_4BIT
827 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
828 #ifdef GHASH
829 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
830 size_t len) = ctx->ghash;
831 #endif
832 #endif
833
834 mlen += len;
835 if (mlen > ((UINT64_C(1) << 36) - 32) ||
836 (sizeof(len) == 8 && mlen < len)) {
837 return 0;
838 }
839 ctx->len.u[1] = mlen;
840
841 if (ctx->ares) {
842 /* First call to encrypt finalizes GHASH(AAD) */
843 GCM_MUL(ctx, Xi);
844 ctx->ares = 0;
845 }
846
847 n = ctx->mres;
848 if (n) {
849 while (n && len) {
850 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
851 --len;
852 n = (n + 1) % 16;
853 }
854 if (n == 0) {
855 GCM_MUL(ctx, Xi);
856 } else {
857 ctx->mres = n;
858 return 1;
859 }
860 }
861
862 #if defined(AESNI_GCM)
863 if (ctx->use_aesni_gcm_crypt) {
864 /* |aesni_gcm_encrypt| may not process all the input given to it. It may
865 * not process *any* of its input if it is deemed too small. */
866 size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
867 in += bulk;
868 out += bulk;
869 len -= bulk;
870 }
871 #endif
872
873 ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
874
875 #if defined(GHASH)
876 while (len >= GHASH_CHUNK) {
877 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
878 ctr += GHASH_CHUNK / 16;
879 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
880 GHASH(ctx, out, GHASH_CHUNK);
881 out += GHASH_CHUNK;
882 in += GHASH_CHUNK;
883 len -= GHASH_CHUNK;
884 }
885 #endif
886 size_t i = len & kSizeTWithoutLower4Bits;
887 if (i != 0) {
888 size_t j = i / 16;
889
890 (*stream)(in, out, j, key, ctx->Yi.c);
891 ctr += (unsigned int)j;
892 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
893 in += i;
894 len -= i;
895 #if defined(GHASH)
896 GHASH(ctx, out, i);
897 out += i;
898 #else
899 while (j--) {
900 for (i = 0; i < 16; ++i) {
901 ctx->Xi.c[i] ^= out[i];
902 }
903 GCM_MUL(ctx, Xi);
904 out += 16;
905 }
906 #endif
907 }
908 if (len) {
909 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
910 ++ctr;
911 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
912 while (len--) {
913 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
914 ++n;
915 }
916 }
917
918 ctx->mres = n;
919 return 1;
920 }
921
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const void * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)922 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
923 const uint8_t *in, uint8_t *out, size_t len,
924 ctr128_f stream) {
925 unsigned int n, ctr;
926 uint64_t mlen = ctx->len.u[1];
927 #ifdef GCM_FUNCREF_4BIT
928 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
929 #ifdef GHASH
930 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
931 size_t len) = ctx->ghash;
932 #endif
933 #endif
934
935 mlen += len;
936 if (mlen > ((UINT64_C(1) << 36) - 32) ||
937 (sizeof(len) == 8 && mlen < len)) {
938 return 0;
939 }
940 ctx->len.u[1] = mlen;
941
942 if (ctx->ares) {
943 /* First call to decrypt finalizes GHASH(AAD) */
944 GCM_MUL(ctx, Xi);
945 ctx->ares = 0;
946 }
947
948 n = ctx->mres;
949 if (n) {
950 while (n && len) {
951 uint8_t c = *(in++);
952 *(out++) = c ^ ctx->EKi.c[n];
953 ctx->Xi.c[n] ^= c;
954 --len;
955 n = (n + 1) % 16;
956 }
957 if (n == 0) {
958 GCM_MUL(ctx, Xi);
959 } else {
960 ctx->mres = n;
961 return 1;
962 }
963 }
964
965 #if defined(AESNI_GCM)
966 if (ctx->use_aesni_gcm_crypt) {
967 /* |aesni_gcm_decrypt| may not process all the input given to it. It may
968 * not process *any* of its input if it is deemed too small. */
969 size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
970 in += bulk;
971 out += bulk;
972 len -= bulk;
973 }
974 #endif
975
976 ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
977
978 #if defined(GHASH)
979 while (len >= GHASH_CHUNK) {
980 GHASH(ctx, in, GHASH_CHUNK);
981 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
982 ctr += GHASH_CHUNK / 16;
983 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
984 out += GHASH_CHUNK;
985 in += GHASH_CHUNK;
986 len -= GHASH_CHUNK;
987 }
988 #endif
989 size_t i = len & kSizeTWithoutLower4Bits;
990 if (i != 0) {
991 size_t j = i / 16;
992
993 #if defined(GHASH)
994 GHASH(ctx, in, i);
995 #else
996 while (j--) {
997 size_t k;
998 for (k = 0; k < 16; ++k) {
999 ctx->Xi.c[k] ^= in[k];
1000 }
1001 GCM_MUL(ctx, Xi);
1002 in += 16;
1003 }
1004 j = i / 16;
1005 in -= i;
1006 #endif
1007 (*stream)(in, out, j, key, ctx->Yi.c);
1008 ctr += (unsigned int)j;
1009 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
1010 out += i;
1011 in += i;
1012 len -= i;
1013 }
1014 if (len) {
1015 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1016 ++ctr;
1017 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
1018 while (len--) {
1019 uint8_t c = in[n];
1020 ctx->Xi.c[n] ^= c;
1021 out[n] = c ^ ctx->EKi.c[n];
1022 ++n;
1023 }
1024 }
1025
1026 ctx->mres = n;
1027 return 1;
1028 }
1029
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const uint8_t * tag,size_t len)1030 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
1031 uint64_t alen = ctx->len.u[0] << 3;
1032 uint64_t clen = ctx->len.u[1] << 3;
1033 #ifdef GCM_FUNCREF_4BIT
1034 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
1035 #endif
1036
1037 if (ctx->mres || ctx->ares) {
1038 GCM_MUL(ctx, Xi);
1039 }
1040
1041 alen = CRYPTO_bswap8(alen);
1042 clen = CRYPTO_bswap8(clen);
1043
1044 ctx->Xi.u[0] ^= alen;
1045 ctx->Xi.u[1] ^= clen;
1046 GCM_MUL(ctx, Xi);
1047
1048 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1049 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1050
1051 if (tag && len <= sizeof(ctx->Xi)) {
1052 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
1053 } else {
1054 return 0;
1055 }
1056 }
1057
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)1058 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
1059 CRYPTO_gcm128_finish(ctx, NULL, 0);
1060 OPENSSL_memcpy(tag, ctx->Xi.c,
1061 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1062 }
1063
1064 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
crypto_gcm_clmul_enabled(void)1065 int crypto_gcm_clmul_enabled(void) {
1066 #ifdef GHASH_ASM
1067 const uint32_t *ia32cap = OPENSSL_ia32cap_get();
1068 return (ia32cap[0] & (1 << 24)) && /* check FXSR bit */
1069 (ia32cap[1] & (1 << 1)); /* check PCLMULQDQ bit */
1070 #else
1071 return 0;
1072 #endif
1073 }
1074 #endif
1075