1 /* ====================================================================
2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ==================================================================== */
48
49 #include <openssl/modes.h>
50
51 #include <assert.h>
52
53 #include <openssl/mem.h>
54 #include <openssl/cpu.h>
55
56 #include "internal.h"
57 #include "../internal.h"
58
59
60 #if !defined(OPENSSL_NO_ASM) && \
61 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
62 #define GHASH_ASM
63 #endif
64
65 #if defined(BSWAP4) && STRICT_ALIGNMENT == 1
66 /* redefine, because alignment is ensured */
67 #undef GETU32
68 #define GETU32(p) BSWAP4(*(const uint32_t *)(p))
69 #undef PUTU32
70 #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
71 #endif
72
73 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
74 #define REDUCE1BIT(V) \
75 do { \
76 if (sizeof(size_t) == 8) { \
77 uint64_t T = OPENSSL_U64(0xe100000000000000) & (0 - (V.lo & 1)); \
78 V.lo = (V.hi << 63) | (V.lo >> 1); \
79 V.hi = (V.hi >> 1) ^ T; \
80 } else { \
81 uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1)); \
82 V.lo = (V.hi << 63) | (V.lo >> 1); \
83 V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32); \
84 } \
85 } while (0)
86
87
gcm_init_4bit(u128 Htable[16],uint64_t H[2])88 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
89 u128 V;
90
91 Htable[0].hi = 0;
92 Htable[0].lo = 0;
93 V.hi = H[0];
94 V.lo = H[1];
95
96 Htable[8] = V;
97 REDUCE1BIT(V);
98 Htable[4] = V;
99 REDUCE1BIT(V);
100 Htable[2] = V;
101 REDUCE1BIT(V);
102 Htable[1] = V;
103 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
104 V = Htable[4];
105 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
106 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
107 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
108 V = Htable[8];
109 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
110 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
111 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
112 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
113 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
114 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
115 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
116
117 #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
118 /* ARM assembler expects specific dword order in Htable. */
119 {
120 int j;
121 const union {
122 long one;
123 char little;
124 } is_endian = {1};
125
126 if (is_endian.little) {
127 for (j = 0; j < 16; ++j) {
128 V = Htable[j];
129 Htable[j].hi = V.lo;
130 Htable[j].lo = V.hi;
131 }
132 } else {
133 for (j = 0; j < 16; ++j) {
134 V = Htable[j];
135 Htable[j].hi = V.lo << 32 | V.lo >> 32;
136 Htable[j].lo = V.hi << 32 | V.hi >> 32;
137 }
138 }
139 }
140 #endif
141 }
142
143 #if !defined(GHASH_ASM)
144 static const size_t rem_4bit[16] = {
145 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
146 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
147 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
148 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
149
gcm_gmult_4bit(uint64_t Xi[2],const u128 Htable[16])150 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
151 u128 Z;
152 int cnt = 15;
153 size_t rem, nlo, nhi;
154 const union {
155 long one;
156 char little;
157 } is_endian = {1};
158
159 nlo = ((const uint8_t *)Xi)[15];
160 nhi = nlo >> 4;
161 nlo &= 0xf;
162
163 Z.hi = Htable[nlo].hi;
164 Z.lo = Htable[nlo].lo;
165
166 while (1) {
167 rem = (size_t)Z.lo & 0xf;
168 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
169 Z.hi = (Z.hi >> 4);
170 if (sizeof(size_t) == 8) {
171 Z.hi ^= rem_4bit[rem];
172 } else {
173 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
174 }
175
176 Z.hi ^= Htable[nhi].hi;
177 Z.lo ^= Htable[nhi].lo;
178
179 if (--cnt < 0) {
180 break;
181 }
182
183 nlo = ((const uint8_t *)Xi)[cnt];
184 nhi = nlo >> 4;
185 nlo &= 0xf;
186
187 rem = (size_t)Z.lo & 0xf;
188 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
189 Z.hi = (Z.hi >> 4);
190 if (sizeof(size_t) == 8) {
191 Z.hi ^= rem_4bit[rem];
192 } else {
193 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
194 }
195
196 Z.hi ^= Htable[nlo].hi;
197 Z.lo ^= Htable[nlo].lo;
198 }
199
200 if (is_endian.little) {
201 #ifdef BSWAP8
202 Xi[0] = BSWAP8(Z.hi);
203 Xi[1] = BSWAP8(Z.lo);
204 #else
205 uint8_t *p = (uint8_t *)Xi;
206 uint32_t v;
207 v = (uint32_t)(Z.hi >> 32);
208 PUTU32(p, v);
209 v = (uint32_t)(Z.hi);
210 PUTU32(p + 4, v);
211 v = (uint32_t)(Z.lo >> 32);
212 PUTU32(p + 8, v);
213 v = (uint32_t)(Z.lo);
214 PUTU32(p + 12, v);
215 #endif
216 } else {
217 Xi[0] = Z.hi;
218 Xi[1] = Z.lo;
219 }
220 }
221
222 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
223 * details... Compiler-generated code doesn't seem to give any
224 * performance improvement, at least not on x86[_64]. It's here
225 * mostly as reference and a placeholder for possible future
226 * non-trivial optimization[s]... */
gcm_ghash_4bit(uint64_t Xi[2],const u128 Htable[16],const uint8_t * inp,size_t len)227 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
228 size_t len) {
229 u128 Z;
230 int cnt;
231 size_t rem, nlo, nhi;
232 const union {
233 long one;
234 char little;
235 } is_endian = {1};
236
237 do {
238 cnt = 15;
239 nlo = ((const uint8_t *)Xi)[15];
240 nlo ^= inp[15];
241 nhi = nlo >> 4;
242 nlo &= 0xf;
243
244 Z.hi = Htable[nlo].hi;
245 Z.lo = Htable[nlo].lo;
246
247 while (1) {
248 rem = (size_t)Z.lo & 0xf;
249 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
250 Z.hi = (Z.hi >> 4);
251 if (sizeof(size_t) == 8) {
252 Z.hi ^= rem_4bit[rem];
253 } else {
254 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
255 }
256
257 Z.hi ^= Htable[nhi].hi;
258 Z.lo ^= Htable[nhi].lo;
259
260 if (--cnt < 0) {
261 break;
262 }
263
264 nlo = ((const uint8_t *)Xi)[cnt];
265 nlo ^= inp[cnt];
266 nhi = nlo >> 4;
267 nlo &= 0xf;
268
269 rem = (size_t)Z.lo & 0xf;
270 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
271 Z.hi = (Z.hi >> 4);
272 if (sizeof(size_t) == 8) {
273 Z.hi ^= rem_4bit[rem];
274 } else {
275 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
276 }
277
278 Z.hi ^= Htable[nlo].hi;
279 Z.lo ^= Htable[nlo].lo;
280 }
281
282 if (is_endian.little) {
283 #ifdef BSWAP8
284 Xi[0] = BSWAP8(Z.hi);
285 Xi[1] = BSWAP8(Z.lo);
286 #else
287 uint8_t *p = (uint8_t *)Xi;
288 uint32_t v;
289 v = (uint32_t)(Z.hi >> 32);
290 PUTU32(p, v);
291 v = (uint32_t)(Z.hi);
292 PUTU32(p + 4, v);
293 v = (uint32_t)(Z.lo >> 32);
294 PUTU32(p + 8, v);
295 v = (uint32_t)(Z.lo);
296 PUTU32(p + 12, v);
297 #endif
298 } else {
299 Xi[0] = Z.hi;
300 Xi[1] = Z.lo;
301 }
302 } while (inp += 16, len -= 16);
303 }
304 #else /* GHASH_ASM */
305 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
306 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
307 size_t len);
308 #endif
309
310 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
311 #if defined(GHASH_ASM)
312 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
313 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
314 * trashing effect. In other words idea is to hash data while it's
315 * still in L1 cache after encryption pass... */
316 #define GHASH_CHUNK (3 * 1024)
317 #endif
318
319
320 #if defined(GHASH_ASM)
321 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
322 #define GHASH_ASM_X86_OR_64
323 #define GCM_FUNCREF_4BIT
324 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
325 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
326 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
327 size_t len);
328
329 #if defined(OPENSSL_X86)
330 #define gcm_init_avx gcm_init_clmul
331 #define gcm_gmult_avx gcm_gmult_clmul
332 #define gcm_ghash_avx gcm_ghash_clmul
333 #else
334 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
335 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
336 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len);
337 #endif
338
339 #if defined(OPENSSL_X86)
340 #define GHASH_ASM_X86
341 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
342 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
343 size_t len);
344
345 void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
346 void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
347 size_t len);
348 #endif
349 #elif defined(OPENSSL_ARM)
350 #include "../arm_arch.h"
351 #if __ARM_ARCH__ >= 7
352 #define GHASH_ASM_ARM
353 #define GCM_FUNCREF_4BIT
354 void gcm_init_neon(u128 Htable[16],const uint64_t Xi[2]);
355 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
356 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
357 size_t len);
358 #endif
359 #endif
360 #endif
361
362 #ifdef GCM_FUNCREF_4BIT
363 #undef GCM_MUL
364 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
365 #ifdef GHASH
366 #undef GHASH
367 #define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
368 #endif
369 #endif
370
CRYPTO_gcm128_new(void * key,block128_f block)371 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) {
372 GCM128_CONTEXT *ret;
373
374 ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT));
375 if (ret != NULL) {
376 CRYPTO_gcm128_init(ret, key, block);
377 }
378
379 return ret;
380 }
381
CRYPTO_gcm128_init(GCM128_CONTEXT * ctx,void * key,block128_f block)382 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) {
383 const union {
384 long one;
385 char little;
386 } is_endian = {1};
387
388 memset(ctx, 0, sizeof(*ctx));
389 ctx->block = block;
390 ctx->key = key;
391
392 (*block)(ctx->H.c, ctx->H.c, key);
393
394 if (is_endian.little) {
395 /* H is stored in host byte order */
396 #ifdef BSWAP8
397 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
398 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
399 #else
400 uint8_t *p = ctx->H.c;
401 uint64_t hi, lo;
402 hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
403 lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
404 ctx->H.u[0] = hi;
405 ctx->H.u[1] = lo;
406 #endif
407 }
408
409 #if defined(GHASH_ASM_X86_OR_64)
410 if (crypto_gcm_clmul_enabled()) {
411 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
412 gcm_init_avx(ctx->Htable, ctx->H.u);
413 ctx->gmult = gcm_gmult_avx;
414 ctx->ghash = gcm_ghash_avx;
415 } else {
416 gcm_init_clmul(ctx->Htable, ctx->H.u);
417 ctx->gmult = gcm_gmult_clmul;
418 ctx->ghash = gcm_ghash_clmul;
419 }
420 return;
421 }
422 gcm_init_4bit(ctx->Htable, ctx->H.u);
423 #if defined(GHASH_ASM_X86) /* x86 only */
424 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
425 ctx->gmult = gcm_gmult_4bit_mmx;
426 ctx->ghash = gcm_ghash_4bit_mmx;
427 } else {
428 ctx->gmult = gcm_gmult_4bit_x86;
429 ctx->ghash = gcm_ghash_4bit_x86;
430 }
431 #else
432 ctx->gmult = gcm_gmult_4bit;
433 ctx->ghash = gcm_ghash_4bit;
434 #endif
435 #elif defined(GHASH_ASM_ARM)
436 if (CRYPTO_is_NEON_capable()) {
437 gcm_init_neon(ctx->Htable,ctx->H.u);
438 ctx->gmult = gcm_gmult_neon;
439 ctx->ghash = gcm_ghash_neon;
440 } else {
441 gcm_init_4bit(ctx->Htable, ctx->H.u);
442 ctx->gmult = gcm_gmult_4bit;
443 ctx->ghash = gcm_ghash_4bit;
444 }
445 #else
446 ctx->gmult = gcm_gmult_4bit;
447 ctx->ghash = gcm_ghash_4bit;
448 gcm_init_4bit(ctx->Htable, ctx->H.u);
449 #endif
450 }
451
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const uint8_t * iv,size_t len)452 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const uint8_t *iv, size_t len) {
453 const union {
454 long one;
455 char little;
456 } is_endian = {1};
457 unsigned int ctr;
458 #ifdef GCM_FUNCREF_4BIT
459 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
460 #endif
461
462 ctx->Yi.u[0] = 0;
463 ctx->Yi.u[1] = 0;
464 ctx->Xi.u[0] = 0;
465 ctx->Xi.u[1] = 0;
466 ctx->len.u[0] = 0; /* AAD length */
467 ctx->len.u[1] = 0; /* message length */
468 ctx->ares = 0;
469 ctx->mres = 0;
470
471 if (len == 12) {
472 memcpy(ctx->Yi.c, iv, 12);
473 ctx->Yi.c[15] = 1;
474 ctr = 1;
475 } else {
476 size_t i;
477 uint64_t len0 = len;
478
479 while (len >= 16) {
480 for (i = 0; i < 16; ++i) {
481 ctx->Yi.c[i] ^= iv[i];
482 }
483 GCM_MUL(ctx, Yi);
484 iv += 16;
485 len -= 16;
486 }
487 if (len) {
488 for (i = 0; i < len; ++i) {
489 ctx->Yi.c[i] ^= iv[i];
490 }
491 GCM_MUL(ctx, Yi);
492 }
493 len0 <<= 3;
494 if (is_endian.little) {
495 #ifdef BSWAP8
496 ctx->Yi.u[1] ^= BSWAP8(len0);
497 #else
498 ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
499 ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
500 ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
501 ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
502 ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
503 ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
504 ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
505 ctx->Yi.c[15] ^= (uint8_t)(len0);
506 #endif
507 } else {
508 ctx->Yi.u[1] ^= len0;
509 }
510
511 GCM_MUL(ctx, Yi);
512
513 if (is_endian.little) {
514 ctr = GETU32(ctx->Yi.c + 12);
515 } else {
516 ctr = ctx->Yi.d[3];
517 }
518 }
519
520 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
521 ++ctr;
522 if (is_endian.little) {
523 PUTU32(ctx->Yi.c + 12, ctr);
524 } else {
525 ctx->Yi.d[3] = ctr;
526 }
527 }
528
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const uint8_t * aad,size_t len)529 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
530 size_t i;
531 unsigned int n;
532 uint64_t alen = ctx->len.u[0];
533 #ifdef GCM_FUNCREF_4BIT
534 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
535 #ifdef GHASH
536 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
537 size_t len) = ctx->ghash;
538 #endif
539 #endif
540
541 if (ctx->len.u[1]) {
542 return 0;
543 }
544
545 alen += len;
546 if (alen > (OPENSSL_U64(1) << 61) || (sizeof(len) == 8 && alen < len)) {
547 return 0;
548 }
549 ctx->len.u[0] = alen;
550
551 n = ctx->ares;
552 if (n) {
553 while (n && len) {
554 ctx->Xi.c[n] ^= *(aad++);
555 --len;
556 n = (n + 1) % 16;
557 }
558 if (n == 0) {
559 GCM_MUL(ctx, Xi);
560 } else {
561 ctx->ares = n;
562 return 1;
563 }
564 }
565
566 #ifdef GHASH
567 if ((i = (len & (size_t) - 16))) {
568 GHASH(ctx, aad, i);
569 aad += i;
570 len -= i;
571 }
572 #else
573 while (len >= 16) {
574 for (i = 0; i < 16; ++i) {
575 ctx->Xi.c[i] ^= aad[i];
576 }
577 GCM_MUL(ctx, Xi);
578 aad += 16;
579 len -= 16;
580 }
581 #endif
582 if (len) {
583 n = (unsigned int)len;
584 for (i = 0; i < len; ++i)
585 ctx->Xi.c[i] ^= aad[i];
586 }
587
588 ctx->ares = n;
589 return 1;
590 }
591
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)592 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
593 unsigned char *out, size_t len) {
594 const union {
595 long one;
596 char little;
597 } is_endian = {1};
598 unsigned int n, ctr;
599 size_t i;
600 uint64_t mlen = ctx->len.u[1];
601 block128_f block = ctx->block;
602 void *key = ctx->key;
603 #ifdef GCM_FUNCREF_4BIT
604 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
605 #ifdef GHASH
606 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
607 size_t len) = ctx->ghash;
608 #endif
609 #endif
610
611 mlen += len;
612 if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
613 (sizeof(len) == 8 && mlen < len)) {
614 return 0;
615 }
616 ctx->len.u[1] = mlen;
617
618 if (ctx->ares) {
619 /* First call to encrypt finalizes GHASH(AAD) */
620 GCM_MUL(ctx, Xi);
621 ctx->ares = 0;
622 }
623
624 if (is_endian.little) {
625 ctr = GETU32(ctx->Yi.c + 12);
626 } else {
627 ctr = ctx->Yi.d[3];
628 }
629
630 n = ctx->mres;
631 if (n) {
632 while (n && len) {
633 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
634 --len;
635 n = (n + 1) % 16;
636 }
637 if (n == 0) {
638 GCM_MUL(ctx, Xi);
639 } else {
640 ctx->mres = n;
641 return 1;
642 }
643 }
644 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
645 for (i = 0; i < len; ++i) {
646 if (n == 0) {
647 (*block)(ctx->Yi.c, ctx->EKi.c, key);
648 ++ctr;
649 if (is_endian.little) {
650 PUTU32(ctx->Yi.c + 12, ctr);
651 } else {
652 ctx->Yi.d[3] = ctr;
653 }
654 }
655 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
656 n = (n + 1) % 16;
657 if (n == 0) {
658 GCM_MUL(ctx, Xi);
659 }
660 }
661
662 ctx->mres = n;
663 return 1;
664 }
665 #if defined(GHASH) && defined(GHASH_CHUNK)
666 while (len >= GHASH_CHUNK) {
667 size_t j = GHASH_CHUNK;
668
669 while (j) {
670 size_t *out_t = (size_t *)out;
671 const size_t *in_t = (const size_t *)in;
672
673 (*block)(ctx->Yi.c, ctx->EKi.c, key);
674 ++ctr;
675 if (is_endian.little) {
676 PUTU32(ctx->Yi.c + 12, ctr);
677 } else {
678 ctx->Yi.d[3] = ctr;
679 }
680 for (i = 0; i < 16 / sizeof(size_t); ++i) {
681 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
682 }
683 out += 16;
684 in += 16;
685 j -= 16;
686 }
687 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
688 len -= GHASH_CHUNK;
689 }
690 if ((i = (len & (size_t) - 16))) {
691 size_t j = i;
692
693 while (len >= 16) {
694 size_t *out_t = (size_t *)out;
695 const size_t *in_t = (const size_t *)in;
696
697 (*block)(ctx->Yi.c, ctx->EKi.c, key);
698 ++ctr;
699 if (is_endian.little) {
700 PUTU32(ctx->Yi.c + 12, ctr);
701 } else {
702 ctx->Yi.d[3] = ctr;
703 }
704 for (i = 0; i < 16 / sizeof(size_t); ++i) {
705 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
706 }
707 out += 16;
708 in += 16;
709 len -= 16;
710 }
711 GHASH(ctx, out - j, j);
712 }
713 #else
714 while (len >= 16) {
715 size_t *out_t = (size_t *)out;
716 const size_t *in_t = (const size_t *)in;
717
718 (*block)(ctx->Yi.c, ctx->EKi.c, key);
719 ++ctr;
720 if (is_endian.little) {
721 PUTU32(ctx->Yi.c + 12, ctr);
722 } else {
723 ctx->Yi.d[3] = ctr;
724 }
725 for (i = 0; i < 16 / sizeof(size_t); ++i) {
726 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
727 }
728 GCM_MUL(ctx, Xi);
729 out += 16;
730 in += 16;
731 len -= 16;
732 }
733 #endif
734 if (len) {
735 (*block)(ctx->Yi.c, ctx->EKi.c, key);
736 ++ctr;
737 if (is_endian.little) {
738 PUTU32(ctx->Yi.c + 12, ctr);
739 } else {
740 ctx->Yi.d[3] = ctr;
741 }
742 while (len--) {
743 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
744 ++n;
745 }
746 }
747
748 ctx->mres = n;
749 return 1;
750 }
751
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)752 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
753 unsigned char *out, size_t len) {
754 const union {
755 long one;
756 char little;
757 } is_endian = {1};
758 unsigned int n, ctr;
759 size_t i;
760 uint64_t mlen = ctx->len.u[1];
761 block128_f block = ctx->block;
762 void *key = ctx->key;
763 #ifdef GCM_FUNCREF_4BIT
764 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
765 #ifdef GHASH
766 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
767 size_t len) = ctx->ghash;
768 #endif
769 #endif
770
771 mlen += len;
772 if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
773 (sizeof(len) == 8 && mlen < len)) {
774 return 0;
775 }
776 ctx->len.u[1] = mlen;
777
778 if (ctx->ares) {
779 /* First call to decrypt finalizes GHASH(AAD) */
780 GCM_MUL(ctx, Xi);
781 ctx->ares = 0;
782 }
783
784 if (is_endian.little) {
785 ctr = GETU32(ctx->Yi.c + 12);
786 } else {
787 ctr = ctx->Yi.d[3];
788 }
789
790 n = ctx->mres;
791 if (n) {
792 while (n && len) {
793 uint8_t c = *(in++);
794 *(out++) = c ^ ctx->EKi.c[n];
795 ctx->Xi.c[n] ^= c;
796 --len;
797 n = (n + 1) % 16;
798 }
799 if (n == 0) {
800 GCM_MUL(ctx, Xi);
801 } else {
802 ctx->mres = n;
803 return 1;
804 }
805 }
806 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
807 for (i = 0; i < len; ++i) {
808 uint8_t c;
809 if (n == 0) {
810 (*block)(ctx->Yi.c, ctx->EKi.c, key);
811 ++ctr;
812 if (is_endian.little) {
813 PUTU32(ctx->Yi.c + 12, ctr);
814 } else {
815 ctx->Yi.d[3] = ctr;
816 }
817 }
818 c = in[i];
819 out[i] = c ^ ctx->EKi.c[n];
820 ctx->Xi.c[n] ^= c;
821 n = (n + 1) % 16;
822 if (n == 0) {
823 GCM_MUL(ctx, Xi);
824 }
825 }
826
827 ctx->mres = n;
828 return 1;
829 }
830 #if defined(GHASH) && defined(GHASH_CHUNK)
831 while (len >= GHASH_CHUNK) {
832 size_t j = GHASH_CHUNK;
833
834 GHASH(ctx, in, GHASH_CHUNK);
835 while (j) {
836 size_t *out_t = (size_t *)out;
837 const size_t *in_t = (const size_t *)in;
838
839 (*block)(ctx->Yi.c, ctx->EKi.c, key);
840 ++ctr;
841 if (is_endian.little) {
842 PUTU32(ctx->Yi.c + 12, ctr);
843 } else {
844 ctx->Yi.d[3] = ctr;
845 }
846 for (i = 0; i < 16 / sizeof(size_t); ++i) {
847 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
848 }
849 out += 16;
850 in += 16;
851 j -= 16;
852 }
853 len -= GHASH_CHUNK;
854 }
855 if ((i = (len & (size_t) - 16))) {
856 GHASH(ctx, in, i);
857 while (len >= 16) {
858 size_t *out_t = (size_t *)out;
859 const size_t *in_t = (const size_t *)in;
860
861 (*block)(ctx->Yi.c, ctx->EKi.c, key);
862 ++ctr;
863 if (is_endian.little) {
864 PUTU32(ctx->Yi.c + 12, ctr);
865 } else {
866 ctx->Yi.d[3] = ctr;
867 }
868 for (i = 0; i < 16 / sizeof(size_t); ++i) {
869 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
870 }
871 out += 16;
872 in += 16;
873 len -= 16;
874 }
875 }
876 #else
877 while (len >= 16) {
878 size_t *out_t = (size_t *)out;
879 const size_t *in_t = (const size_t *)in;
880
881 (*block)(ctx->Yi.c, ctx->EKi.c, key);
882 ++ctr;
883 if (is_endian.little) {
884 PUTU32(ctx->Yi.c + 12, ctr);
885 } else {
886 ctx->Yi.d[3] = ctr;
887 }
888 for (i = 0; i < 16 / sizeof(size_t); ++i) {
889 size_t c = in_t[i];
890 out_t[i] = c ^ ctx->EKi.t[i];
891 ctx->Xi.t[i] ^= c;
892 }
893 GCM_MUL(ctx, Xi);
894 out += 16;
895 in += 16;
896 len -= 16;
897 }
898 #endif
899 if (len) {
900 (*block)(ctx->Yi.c, ctx->EKi.c, key);
901 ++ctr;
902 if (is_endian.little) {
903 PUTU32(ctx->Yi.c + 12, ctr);
904 } else {
905 ctx->Yi.d[3] = ctr;
906 }
907 while (len--) {
908 uint8_t c = in[n];
909 ctx->Xi.c[n] ^= c;
910 out[n] = c ^ ctx->EKi.c[n];
911 ++n;
912 }
913 }
914
915 ctx->mres = n;
916 return 1;
917 }
918
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)919 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
920 uint8_t *out, size_t len, ctr128_f stream) {
921 const union {
922 long one;
923 char little;
924 } is_endian = {1};
925 unsigned int n, ctr;
926 size_t i;
927 uint64_t mlen = ctx->len.u[1];
928 void *key = ctx->key;
929 #ifdef GCM_FUNCREF_4BIT
930 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
931 #ifdef GHASH
932 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
933 size_t len) = ctx->ghash;
934 #endif
935 #endif
936
937 mlen += len;
938 if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
939 (sizeof(len) == 8 && mlen < len)) {
940 return 0;
941 }
942 ctx->len.u[1] = mlen;
943
944 if (ctx->ares) {
945 /* First call to encrypt finalizes GHASH(AAD) */
946 GCM_MUL(ctx, Xi);
947 ctx->ares = 0;
948 }
949
950 if (is_endian.little) {
951 ctr = GETU32(ctx->Yi.c + 12);
952 } else {
953 ctr = ctx->Yi.d[3];
954 }
955
956 n = ctx->mres;
957 if (n) {
958 while (n && len) {
959 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
960 --len;
961 n = (n + 1) % 16;
962 }
963 if (n == 0) {
964 GCM_MUL(ctx, Xi);
965 } else {
966 ctx->mres = n;
967 return 1;
968 }
969 }
970 #if defined(GHASH)
971 while (len >= GHASH_CHUNK) {
972 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
973 ctr += GHASH_CHUNK / 16;
974 if (is_endian.little) {
975 PUTU32(ctx->Yi.c + 12, ctr);
976 } else {
977 ctx->Yi.d[3] = ctr;
978 }
979 GHASH(ctx, out, GHASH_CHUNK);
980 out += GHASH_CHUNK;
981 in += GHASH_CHUNK;
982 len -= GHASH_CHUNK;
983 }
984 #endif
985 if ((i = (len & (size_t) - 16))) {
986 size_t j = i / 16;
987
988 (*stream)(in, out, j, key, ctx->Yi.c);
989 ctr += (unsigned int)j;
990 if (is_endian.little) {
991 PUTU32(ctx->Yi.c + 12, ctr);
992 } else {
993 ctx->Yi.d[3] = ctr;
994 }
995 in += i;
996 len -= i;
997 #if defined(GHASH)
998 GHASH(ctx, out, i);
999 out += i;
1000 #else
1001 while (j--) {
1002 for (i = 0; i < 16; ++i) {
1003 ctx->Xi.c[i] ^= out[i];
1004 }
1005 GCM_MUL(ctx, Xi);
1006 out += 16;
1007 }
1008 #endif
1009 }
1010 if (len) {
1011 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1012 ++ctr;
1013 if (is_endian.little) {
1014 PUTU32(ctx->Yi.c + 12, ctr);
1015 } else {
1016 ctx->Yi.d[3] = ctr;
1017 }
1018 while (len--) {
1019 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1020 ++n;
1021 }
1022 }
1023
1024 ctx->mres = n;
1025 return 1;
1026 }
1027
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)1028 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
1029 uint8_t *out, size_t len,
1030 ctr128_f stream) {
1031 const union {
1032 long one;
1033 char little;
1034 } is_endian = {1};
1035 unsigned int n, ctr;
1036 size_t i;
1037 uint64_t mlen = ctx->len.u[1];
1038 void *key = ctx->key;
1039 #ifdef GCM_FUNCREF_4BIT
1040 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
1041 #ifdef GHASH
1042 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
1043 size_t len) = ctx->ghash;
1044 #endif
1045 #endif
1046
1047 mlen += len;
1048 if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
1049 (sizeof(len) == 8 && mlen < len)) {
1050 return 0;
1051 }
1052 ctx->len.u[1] = mlen;
1053
1054 if (ctx->ares) {
1055 /* First call to decrypt finalizes GHASH(AAD) */
1056 GCM_MUL(ctx, Xi);
1057 ctx->ares = 0;
1058 }
1059
1060 if (is_endian.little) {
1061 ctr = GETU32(ctx->Yi.c + 12);
1062 } else {
1063 ctr = ctx->Yi.d[3];
1064 }
1065
1066 n = ctx->mres;
1067 if (n) {
1068 while (n && len) {
1069 uint8_t c = *(in++);
1070 *(out++) = c ^ ctx->EKi.c[n];
1071 ctx->Xi.c[n] ^= c;
1072 --len;
1073 n = (n + 1) % 16;
1074 }
1075 if (n == 0) {
1076 GCM_MUL(ctx, Xi);
1077 } else {
1078 ctx->mres = n;
1079 return 1;
1080 }
1081 }
1082 #if defined(GHASH)
1083 while (len >= GHASH_CHUNK) {
1084 GHASH(ctx, in, GHASH_CHUNK);
1085 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1086 ctr += GHASH_CHUNK / 16;
1087 if (is_endian.little)
1088 PUTU32(ctx->Yi.c + 12, ctr);
1089 else
1090 ctx->Yi.d[3] = ctr;
1091 out += GHASH_CHUNK;
1092 in += GHASH_CHUNK;
1093 len -= GHASH_CHUNK;
1094 }
1095 #endif
1096 if ((i = (len & (size_t) - 16))) {
1097 size_t j = i / 16;
1098
1099 #if defined(GHASH)
1100 GHASH(ctx, in, i);
1101 #else
1102 while (j--) {
1103 size_t k;
1104 for (k = 0; k < 16; ++k)
1105 ctx->Xi.c[k] ^= in[k];
1106 GCM_MUL(ctx, Xi);
1107 in += 16;
1108 }
1109 j = i / 16;
1110 in -= i;
1111 #endif
1112 (*stream)(in, out, j, key, ctx->Yi.c);
1113 ctr += (unsigned int)j;
1114 if (is_endian.little)
1115 PUTU32(ctx->Yi.c + 12, ctr);
1116 else
1117 ctx->Yi.d[3] = ctr;
1118 out += i;
1119 in += i;
1120 len -= i;
1121 }
1122 if (len) {
1123 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1124 ++ctr;
1125 if (is_endian.little)
1126 PUTU32(ctx->Yi.c + 12, ctr);
1127 else
1128 ctx->Yi.d[3] = ctr;
1129 while (len--) {
1130 uint8_t c = in[n];
1131 ctx->Xi.c[n] ^= c;
1132 out[n] = c ^ ctx->EKi.c[n];
1133 ++n;
1134 }
1135 }
1136
1137 ctx->mres = n;
1138 return 1;
1139 }
1140
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const uint8_t * tag,size_t len)1141 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
1142 const union {
1143 long one;
1144 char little;
1145 } is_endian = {1};
1146 uint64_t alen = ctx->len.u[0] << 3;
1147 uint64_t clen = ctx->len.u[1] << 3;
1148 #ifdef GCM_FUNCREF_4BIT
1149 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
1150 #endif
1151
1152 if (ctx->mres || ctx->ares) {
1153 GCM_MUL(ctx, Xi);
1154 }
1155
1156 if (is_endian.little) {
1157 #ifdef BSWAP8
1158 alen = BSWAP8(alen);
1159 clen = BSWAP8(clen);
1160 #else
1161 uint8_t *p = ctx->len.c;
1162
1163 ctx->len.u[0] = alen;
1164 ctx->len.u[1] = clen;
1165
1166 alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
1167 clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
1168 #endif
1169 }
1170
1171 ctx->Xi.u[0] ^= alen;
1172 ctx->Xi.u[1] ^= clen;
1173 GCM_MUL(ctx, Xi);
1174
1175 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1176 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1177
1178 if (tag && len <= sizeof(ctx->Xi)) {
1179 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
1180 } else {
1181 return 0;
1182 }
1183 }
1184
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)1185 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
1186 CRYPTO_gcm128_finish(ctx, NULL, 0);
1187 memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1188 }
1189
CRYPTO_gcm128_release(GCM128_CONTEXT * ctx)1190 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) {
1191 if (ctx) {
1192 OPENSSL_cleanse(ctx, sizeof(*ctx));
1193 OPENSSL_free(ctx);
1194 }
1195 }
1196
1197 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
crypto_gcm_clmul_enabled(void)1198 int crypto_gcm_clmul_enabled(void) {
1199 #ifdef GHASH_ASM
1200 return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
1201 OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */
1202 #else
1203 return 0;
1204 #endif
1205 }
1206 #endif
1207