• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ====================================================================
2  * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ==================================================================== */
48 
49 #include <openssl/base.h>
50 
51 #include <assert.h>
52 #include <string.h>
53 
54 #include <openssl/mem.h>
55 #include <openssl/cpu.h>
56 
57 #include "internal.h"
58 #include "../../internal.h"
59 
60 #if !defined(OPENSSL_NO_ASM) &&                         \
61     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
62      defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
63      defined(OPENSSL_PPC64LE))
64 #define GHASH_ASM
65 #endif
66 
67 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
68 #define REDUCE1BIT(V)                                                 \
69   do {                                                                \
70     if (sizeof(size_t) == 8) {                                        \
71       uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
72       (V).lo = ((V).hi << 63) | ((V).lo >> 1);                        \
73       (V).hi = ((V).hi >> 1) ^ T;                                     \
74     } else {                                                          \
75       uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1));        \
76       (V).lo = ((V).hi << 63) | ((V).lo >> 1);                        \
77       (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32);                   \
78     }                                                                 \
79   } while (0)
80 
81 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
82 // bits of a |size_t|.
83 static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
84 
gcm_init_4bit(u128 Htable[16],uint64_t H[2])85 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
86   u128 V;
87 
88   Htable[0].hi = 0;
89   Htable[0].lo = 0;
90   V.hi = H[0];
91   V.lo = H[1];
92 
93   Htable[8] = V;
94   REDUCE1BIT(V);
95   Htable[4] = V;
96   REDUCE1BIT(V);
97   Htable[2] = V;
98   REDUCE1BIT(V);
99   Htable[1] = V;
100   Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
101   V = Htable[4];
102   Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
103   Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
104   Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
105   V = Htable[8];
106   Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
107   Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
108   Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
109   Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
110   Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
111   Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
112   Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
113 
114 #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
115   for (int j = 0; j < 16; ++j) {
116     V = Htable[j];
117     Htable[j].hi = V.lo;
118     Htable[j].lo = V.hi;
119   }
120 #endif
121 }
122 
123 #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
124 static const size_t rem_4bit[16] = {
125     PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
126     PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
127     PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
128     PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
129 
gcm_gmult_4bit(uint64_t Xi[2],const u128 Htable[16])130 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
131   u128 Z;
132   int cnt = 15;
133   size_t rem, nlo, nhi;
134 
135   nlo = ((const uint8_t *)Xi)[15];
136   nhi = nlo >> 4;
137   nlo &= 0xf;
138 
139   Z.hi = Htable[nlo].hi;
140   Z.lo = Htable[nlo].lo;
141 
142   while (1) {
143     rem = (size_t)Z.lo & 0xf;
144     Z.lo = (Z.hi << 60) | (Z.lo >> 4);
145     Z.hi = (Z.hi >> 4);
146     if (sizeof(size_t) == 8) {
147       Z.hi ^= rem_4bit[rem];
148     } else {
149       Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
150     }
151 
152     Z.hi ^= Htable[nhi].hi;
153     Z.lo ^= Htable[nhi].lo;
154 
155     if (--cnt < 0) {
156       break;
157     }
158 
159     nlo = ((const uint8_t *)Xi)[cnt];
160     nhi = nlo >> 4;
161     nlo &= 0xf;
162 
163     rem = (size_t)Z.lo & 0xf;
164     Z.lo = (Z.hi << 60) | (Z.lo >> 4);
165     Z.hi = (Z.hi >> 4);
166     if (sizeof(size_t) == 8) {
167       Z.hi ^= rem_4bit[rem];
168     } else {
169       Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
170     }
171 
172     Z.hi ^= Htable[nlo].hi;
173     Z.lo ^= Htable[nlo].lo;
174   }
175 
176   Xi[0] = CRYPTO_bswap8(Z.hi);
177   Xi[1] = CRYPTO_bswap8(Z.lo);
178 }
179 
180 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
181  * details... Compiler-generated code doesn't seem to give any
182  * performance improvement, at least not on x86[_64]. It's here
183  * mostly as reference and a placeholder for possible future
184  * non-trivial optimization[s]... */
gcm_ghash_4bit(uint64_t Xi[2],const u128 Htable[16],const uint8_t * inp,size_t len)185 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
186                            const uint8_t *inp, size_t len) {
187   u128 Z;
188   int cnt;
189   size_t rem, nlo, nhi;
190 
191   do {
192     cnt = 15;
193     nlo = ((const uint8_t *)Xi)[15];
194     nlo ^= inp[15];
195     nhi = nlo >> 4;
196     nlo &= 0xf;
197 
198     Z.hi = Htable[nlo].hi;
199     Z.lo = Htable[nlo].lo;
200 
201     while (1) {
202       rem = (size_t)Z.lo & 0xf;
203       Z.lo = (Z.hi << 60) | (Z.lo >> 4);
204       Z.hi = (Z.hi >> 4);
205       if (sizeof(size_t) == 8) {
206         Z.hi ^= rem_4bit[rem];
207       } else {
208         Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
209       }
210 
211       Z.hi ^= Htable[nhi].hi;
212       Z.lo ^= Htable[nhi].lo;
213 
214       if (--cnt < 0) {
215         break;
216       }
217 
218       nlo = ((const uint8_t *)Xi)[cnt];
219       nlo ^= inp[cnt];
220       nhi = nlo >> 4;
221       nlo &= 0xf;
222 
223       rem = (size_t)Z.lo & 0xf;
224       Z.lo = (Z.hi << 60) | (Z.lo >> 4);
225       Z.hi = (Z.hi >> 4);
226       if (sizeof(size_t) == 8) {
227         Z.hi ^= rem_4bit[rem];
228       } else {
229         Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
230       }
231 
232       Z.hi ^= Htable[nlo].hi;
233       Z.lo ^= Htable[nlo].lo;
234     }
235 
236     Xi[0] = CRYPTO_bswap8(Z.hi);
237     Xi[1] = CRYPTO_bswap8(Z.lo);
238   } while (inp += 16, len -= 16);
239 }
240 #else /* GHASH_ASM */
241 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
242 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
243                     size_t len);
244 #endif
245 
246 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable)
247 #if defined(GHASH_ASM)
248 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
249 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
250  * trashing effect. In other words idea is to hash data while it's
251  * still in L1 cache after encryption pass... */
252 #define GHASH_CHUNK (3 * 1024)
253 #endif
254 
255 
256 #if defined(GHASH_ASM)
257 
258 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
259 #define GCM_FUNCREF_4BIT
260 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
261 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
262 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
263                      size_t len);
264 
265 #if defined(OPENSSL_X86_64)
266 #define GHASH_ASM_X86_64
267 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
268 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
269 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
270                    size_t len);
271 #define AESNI_GCM
272 size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
273                          const void *key, uint8_t ivec[16], uint64_t *Xi);
274 size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
275                          const void *key, uint8_t ivec[16], uint64_t *Xi);
276 #endif
277 
278 #if defined(OPENSSL_X86)
279 #define GHASH_ASM_X86
280 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
281 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
282                         size_t len);
283 #endif
284 
285 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
286 #include <openssl/arm_arch.h>
287 #if __ARM_ARCH__ >= 7
288 #define GHASH_ASM_ARM
289 #define GCM_FUNCREF_4BIT
290 
pmull_capable(void)291 static int pmull_capable(void) {
292   return CRYPTO_is_ARMv8_PMULL_capable();
293 }
294 
295 void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
296 void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
297 void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
298                   size_t len);
299 
300 #if defined(OPENSSL_ARM)
301 /* 32-bit ARM also has support for doing GCM with NEON instructions. */
neon_capable(void)302 static int neon_capable(void) {
303   return CRYPTO_is_NEON_capable();
304 }
305 
306 void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
307 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
308 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
309                     size_t len);
310 #else
311 /* AArch64 only has the ARMv8 versions of functions. */
neon_capable(void)312 static int neon_capable(void) {
313   return 0;
314 }
gcm_init_neon(u128 Htable[16],const uint64_t Xi[2])315 static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
316   abort();
317 }
gcm_gmult_neon(uint64_t Xi[2],const u128 Htable[16])318 static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
319   abort();
320 }
gcm_ghash_neon(uint64_t Xi[2],const u128 Htable[16],const uint8_t * inp,size_t len)321 static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
322                            const uint8_t *inp, size_t len) {
323   abort();
324 }
325 #endif
326 
327 #endif
328 #elif defined(OPENSSL_PPC64LE)
329 #define GHASH_ASM_PPC64LE
330 #define GCM_FUNCREF_4BIT
331 void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
332 void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
333 void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
334                   size_t len);
335 #endif
336 #endif
337 
338 #ifdef GCM_FUNCREF_4BIT
339 #undef GCM_MUL
340 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable)
341 #ifdef GHASH
342 #undef GHASH
343 #define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len)
344 #endif
345 #endif
346 
CRYPTO_ghash_init(gmult_func * out_mult,ghash_func * out_hash,u128 * out_key,u128 out_table[16],int * out_is_avx,const uint8_t * gcm_key)347 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
348                        u128 *out_key, u128 out_table[16],
349                        int *out_is_avx,
350                        const uint8_t *gcm_key) {
351   *out_is_avx = 0;
352 
353   union {
354     uint64_t u[2];
355     uint8_t c[16];
356   } H;
357 
358   OPENSSL_memcpy(H.c, gcm_key, 16);
359 
360   /* H is stored in host byte order */
361   H.u[0] = CRYPTO_bswap8(H.u[0]);
362   H.u[1] = CRYPTO_bswap8(H.u[1]);
363 
364   OPENSSL_memcpy(out_key, H.c, 16);
365 
366 #if defined(GHASH_ASM_X86_64)
367   if (crypto_gcm_clmul_enabled()) {
368     if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
369       gcm_init_avx(out_table, H.u);
370       *out_mult = gcm_gmult_avx;
371       *out_hash = gcm_ghash_avx;
372       *out_is_avx = 1;
373       return;
374     }
375     gcm_init_clmul(out_table, H.u);
376     *out_mult = gcm_gmult_clmul;
377     *out_hash = gcm_ghash_clmul;
378     return;
379   }
380 #elif defined(GHASH_ASM_X86)
381   if (crypto_gcm_clmul_enabled()) {
382     gcm_init_clmul(out_table, H.u);
383     *out_mult = gcm_gmult_clmul;
384     *out_hash = gcm_ghash_clmul;
385     return;
386   }
387 #elif defined(GHASH_ASM_ARM)
388   if (pmull_capable()) {
389     gcm_init_v8(out_table, H.u);
390     *out_mult = gcm_gmult_v8;
391     *out_hash = gcm_ghash_v8;
392     return;
393   }
394 
395   if (neon_capable()) {
396     gcm_init_neon(out_table, H.u);
397     *out_mult = gcm_gmult_neon;
398     *out_hash = gcm_ghash_neon;
399     return;
400   }
401 #elif defined(GHASH_ASM_PPC64LE)
402   if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
403     gcm_init_p8(out_table, H.u);
404     *out_mult = gcm_gmult_p8;
405     *out_hash = gcm_ghash_p8;
406     return;
407   }
408 #endif
409 
410   gcm_init_4bit(out_table, H.u);
411 #if defined(GHASH_ASM_X86)
412   *out_mult = gcm_gmult_4bit_mmx;
413   *out_hash = gcm_ghash_4bit_mmx;
414 #else
415   *out_mult = gcm_gmult_4bit;
416   *out_hash = gcm_ghash_4bit;
417 #endif
418 }
419 
CRYPTO_gcm128_init(GCM128_CONTEXT * ctx,const void * aes_key,block128_f block,int is_aesni_encrypt)420 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key,
421                         block128_f block, int is_aesni_encrypt) {
422   OPENSSL_memset(ctx, 0, sizeof(*ctx));
423   ctx->block = block;
424 
425   uint8_t gcm_key[16];
426   OPENSSL_memset(gcm_key, 0, sizeof(gcm_key));
427   (*block)(gcm_key, gcm_key, aes_key);
428 
429   int is_avx;
430   CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, &is_avx,
431                     gcm_key);
432 
433   ctx->use_aesni_gcm_crypt = (is_avx && is_aesni_encrypt) ? 1 : 0;
434 }
435 
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const void * key,const uint8_t * iv,size_t len)436 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
437                          const uint8_t *iv, size_t len) {
438   unsigned int ctr;
439 #ifdef GCM_FUNCREF_4BIT
440   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
441 #endif
442 
443   ctx->Yi.u[0] = 0;
444   ctx->Yi.u[1] = 0;
445   ctx->Xi.u[0] = 0;
446   ctx->Xi.u[1] = 0;
447   ctx->len.u[0] = 0; /* AAD length */
448   ctx->len.u[1] = 0; /* message length */
449   ctx->ares = 0;
450   ctx->mres = 0;
451 
452   if (len == 12) {
453     OPENSSL_memcpy(ctx->Yi.c, iv, 12);
454     ctx->Yi.c[15] = 1;
455     ctr = 1;
456   } else {
457     uint64_t len0 = len;
458 
459     while (len >= 16) {
460       for (size_t i = 0; i < 16; ++i) {
461         ctx->Yi.c[i] ^= iv[i];
462       }
463       GCM_MUL(ctx, Yi);
464       iv += 16;
465       len -= 16;
466     }
467     if (len) {
468       for (size_t i = 0; i < len; ++i) {
469         ctx->Yi.c[i] ^= iv[i];
470       }
471       GCM_MUL(ctx, Yi);
472     }
473     len0 <<= 3;
474     ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
475 
476     GCM_MUL(ctx, Yi);
477     ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
478   }
479 
480   (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
481   ++ctr;
482   ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
483 }
484 
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const uint8_t * aad,size_t len)485 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
486   unsigned int n;
487   uint64_t alen = ctx->len.u[0];
488 #ifdef GCM_FUNCREF_4BIT
489   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
490 #ifdef GHASH
491   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
492                       size_t len) = ctx->ghash;
493 #endif
494 #endif
495 
496   if (ctx->len.u[1]) {
497     return 0;
498   }
499 
500   alen += len;
501   if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
502     return 0;
503   }
504   ctx->len.u[0] = alen;
505 
506   n = ctx->ares;
507   if (n) {
508     while (n && len) {
509       ctx->Xi.c[n] ^= *(aad++);
510       --len;
511       n = (n + 1) % 16;
512     }
513     if (n == 0) {
514       GCM_MUL(ctx, Xi);
515     } else {
516       ctx->ares = n;
517       return 1;
518     }
519   }
520 
521   /* Process a whole number of blocks. */
522 #ifdef GHASH
523   size_t len_blocks = len & kSizeTWithoutLower4Bits;
524   if (len_blocks != 0) {
525     GHASH(ctx, aad, len_blocks);
526     aad += len_blocks;
527     len -= len_blocks;
528   }
529 #else
530   while (len >= 16) {
531     for (size_t i = 0; i < 16; ++i) {
532       ctx->Xi.c[i] ^= aad[i];
533     }
534     GCM_MUL(ctx, Xi);
535     aad += 16;
536     len -= 16;
537   }
538 #endif
539 
540   /* Process the remainder. */
541   if (len != 0) {
542     n = (unsigned int)len;
543     for (size_t i = 0; i < len; ++i) {
544       ctx->Xi.c[i] ^= aad[i];
545     }
546   }
547 
548   ctx->ares = n;
549   return 1;
550 }
551 
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const void * key,const unsigned char * in,unsigned char * out,size_t len)552 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
553                           const unsigned char *in, unsigned char *out,
554                           size_t len) {
555   unsigned int n, ctr;
556   uint64_t mlen = ctx->len.u[1];
557   block128_f block = ctx->block;
558 #ifdef GCM_FUNCREF_4BIT
559   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
560 #ifdef GHASH
561   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
562                       size_t len) = ctx->ghash;
563 #endif
564 #endif
565 
566   mlen += len;
567   if (mlen > ((UINT64_C(1) << 36) - 32) ||
568       (sizeof(len) == 8 && mlen < len)) {
569     return 0;
570   }
571   ctx->len.u[1] = mlen;
572 
573   if (ctx->ares) {
574     /* First call to encrypt finalizes GHASH(AAD) */
575     GCM_MUL(ctx, Xi);
576     ctx->ares = 0;
577   }
578 
579   ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
580 
581   n = ctx->mres;
582   if (n) {
583     while (n && len) {
584       ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
585       --len;
586       n = (n + 1) % 16;
587     }
588     if (n == 0) {
589       GCM_MUL(ctx, Xi);
590     } else {
591       ctx->mres = n;
592       return 1;
593     }
594   }
595   if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
596     for (size_t i = 0; i < len; ++i) {
597       if (n == 0) {
598         (*block)(ctx->Yi.c, ctx->EKi.c, key);
599         ++ctr;
600         ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
601       }
602       ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
603       n = (n + 1) % 16;
604       if (n == 0) {
605         GCM_MUL(ctx, Xi);
606       }
607     }
608 
609     ctx->mres = n;
610     return 1;
611   }
612 #if defined(GHASH) && defined(GHASH_CHUNK)
613   while (len >= GHASH_CHUNK) {
614     size_t j = GHASH_CHUNK;
615 
616     while (j) {
617       size_t *out_t = (size_t *)out;
618       const size_t *in_t = (const size_t *)in;
619 
620       (*block)(ctx->Yi.c, ctx->EKi.c, key);
621       ++ctr;
622       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
623       for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
624         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
625       }
626       out += 16;
627       in += 16;
628       j -= 16;
629     }
630     GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
631     len -= GHASH_CHUNK;
632   }
633   size_t len_blocks = len & kSizeTWithoutLower4Bits;
634   if (len_blocks != 0) {
635     while (len >= 16) {
636       size_t *out_t = (size_t *)out;
637       const size_t *in_t = (const size_t *)in;
638 
639       (*block)(ctx->Yi.c, ctx->EKi.c, key);
640       ++ctr;
641       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
642       for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
643         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
644       }
645       out += 16;
646       in += 16;
647       len -= 16;
648     }
649     GHASH(ctx, out - len_blocks, len_blocks);
650   }
651 #else
652   while (len >= 16) {
653     size_t *out_t = (size_t *)out;
654     const size_t *in_t = (const size_t *)in;
655 
656     (*block)(ctx->Yi.c, ctx->EKi.c, key);
657     ++ctr;
658     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
659     for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
660       ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
661     }
662     GCM_MUL(ctx, Xi);
663     out += 16;
664     in += 16;
665     len -= 16;
666   }
667 #endif
668   if (len) {
669     (*block)(ctx->Yi.c, ctx->EKi.c, key);
670     ++ctr;
671     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
672     while (len--) {
673       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
674       ++n;
675     }
676   }
677 
678   ctx->mres = n;
679   return 1;
680 }
681 
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const void * key,const unsigned char * in,unsigned char * out,size_t len)682 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
683                           const unsigned char *in, unsigned char *out,
684                           size_t len) {
685   unsigned int n, ctr;
686   uint64_t mlen = ctx->len.u[1];
687   block128_f block = ctx->block;
688 #ifdef GCM_FUNCREF_4BIT
689   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
690 #ifdef GHASH
691   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
692                       size_t len) = ctx->ghash;
693 #endif
694 #endif
695 
696   mlen += len;
697   if (mlen > ((UINT64_C(1) << 36) - 32) ||
698       (sizeof(len) == 8 && mlen < len)) {
699     return 0;
700   }
701   ctx->len.u[1] = mlen;
702 
703   if (ctx->ares) {
704     /* First call to decrypt finalizes GHASH(AAD) */
705     GCM_MUL(ctx, Xi);
706     ctx->ares = 0;
707   }
708 
709   ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
710 
711   n = ctx->mres;
712   if (n) {
713     while (n && len) {
714       uint8_t c = *(in++);
715       *(out++) = c ^ ctx->EKi.c[n];
716       ctx->Xi.c[n] ^= c;
717       --len;
718       n = (n + 1) % 16;
719     }
720     if (n == 0) {
721       GCM_MUL(ctx, Xi);
722     } else {
723       ctx->mres = n;
724       return 1;
725     }
726   }
727   if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
728     for (size_t i = 0; i < len; ++i) {
729       uint8_t c;
730       if (n == 0) {
731         (*block)(ctx->Yi.c, ctx->EKi.c, key);
732         ++ctr;
733         ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
734       }
735       c = in[i];
736       out[i] = c ^ ctx->EKi.c[n];
737       ctx->Xi.c[n] ^= c;
738       n = (n + 1) % 16;
739       if (n == 0) {
740         GCM_MUL(ctx, Xi);
741       }
742     }
743 
744     ctx->mres = n;
745     return 1;
746   }
747 #if defined(GHASH) && defined(GHASH_CHUNK)
748   while (len >= GHASH_CHUNK) {
749     size_t j = GHASH_CHUNK;
750 
751     GHASH(ctx, in, GHASH_CHUNK);
752     while (j) {
753       size_t *out_t = (size_t *)out;
754       const size_t *in_t = (const size_t *)in;
755 
756       (*block)(ctx->Yi.c, ctx->EKi.c, key);
757       ++ctr;
758       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
759       for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
760         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
761       }
762       out += 16;
763       in += 16;
764       j -= 16;
765     }
766     len -= GHASH_CHUNK;
767   }
768   size_t len_blocks = len & kSizeTWithoutLower4Bits;
769   if (len_blocks != 0) {
770     GHASH(ctx, in, len_blocks);
771     while (len >= 16) {
772       size_t *out_t = (size_t *)out;
773       const size_t *in_t = (const size_t *)in;
774 
775       (*block)(ctx->Yi.c, ctx->EKi.c, key);
776       ++ctr;
777       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
778       for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
779         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
780       }
781       out += 16;
782       in += 16;
783       len -= 16;
784     }
785   }
786 #else
787   while (len >= 16) {
788     size_t *out_t = (size_t *)out;
789     const size_t *in_t = (const size_t *)in;
790 
791     (*block)(ctx->Yi.c, ctx->EKi.c, key);
792     ++ctr;
793     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
794     for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
795       size_t c = in_t[i];
796       out_t[i] = c ^ ctx->EKi.t[i];
797       ctx->Xi.t[i] ^= c;
798     }
799     GCM_MUL(ctx, Xi);
800     out += 16;
801     in += 16;
802     len -= 16;
803   }
804 #endif
805   if (len) {
806     (*block)(ctx->Yi.c, ctx->EKi.c, key);
807     ++ctr;
808     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
809     while (len--) {
810       uint8_t c = in[n];
811       ctx->Xi.c[n] ^= c;
812       out[n] = c ^ ctx->EKi.c[n];
813       ++n;
814     }
815   }
816 
817   ctx->mres = n;
818   return 1;
819 }
820 
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const void * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)821 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
822                                 const uint8_t *in, uint8_t *out, size_t len,
823                                 ctr128_f stream) {
824   unsigned int n, ctr;
825   uint64_t mlen = ctx->len.u[1];
826 #ifdef GCM_FUNCREF_4BIT
827   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
828 #ifdef GHASH
829   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
830                       size_t len) = ctx->ghash;
831 #endif
832 #endif
833 
834   mlen += len;
835   if (mlen > ((UINT64_C(1) << 36) - 32) ||
836       (sizeof(len) == 8 && mlen < len)) {
837     return 0;
838   }
839   ctx->len.u[1] = mlen;
840 
841   if (ctx->ares) {
842     /* First call to encrypt finalizes GHASH(AAD) */
843     GCM_MUL(ctx, Xi);
844     ctx->ares = 0;
845   }
846 
847   n = ctx->mres;
848   if (n) {
849     while (n && len) {
850       ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
851       --len;
852       n = (n + 1) % 16;
853     }
854     if (n == 0) {
855       GCM_MUL(ctx, Xi);
856     } else {
857       ctx->mres = n;
858       return 1;
859     }
860   }
861 
862 #if defined(AESNI_GCM)
863   if (ctx->use_aesni_gcm_crypt) {
864     /* |aesni_gcm_encrypt| may not process all the input given to it. It may
865      * not process *any* of its input if it is deemed too small. */
866     size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
867     in += bulk;
868     out += bulk;
869     len -= bulk;
870   }
871 #endif
872 
873   ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
874 
875 #if defined(GHASH)
876   while (len >= GHASH_CHUNK) {
877     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
878     ctr += GHASH_CHUNK / 16;
879     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
880     GHASH(ctx, out, GHASH_CHUNK);
881     out += GHASH_CHUNK;
882     in += GHASH_CHUNK;
883     len -= GHASH_CHUNK;
884   }
885 #endif
886   size_t i = len & kSizeTWithoutLower4Bits;
887   if (i != 0) {
888     size_t j = i / 16;
889 
890     (*stream)(in, out, j, key, ctx->Yi.c);
891     ctr += (unsigned int)j;
892     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
893     in += i;
894     len -= i;
895 #if defined(GHASH)
896     GHASH(ctx, out, i);
897     out += i;
898 #else
899     while (j--) {
900       for (i = 0; i < 16; ++i) {
901         ctx->Xi.c[i] ^= out[i];
902       }
903       GCM_MUL(ctx, Xi);
904       out += 16;
905     }
906 #endif
907   }
908   if (len) {
909     (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
910     ++ctr;
911     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
912     while (len--) {
913       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
914       ++n;
915     }
916   }
917 
918   ctx->mres = n;
919   return 1;
920 }
921 
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const void * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)922 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
923                                 const uint8_t *in, uint8_t *out, size_t len,
924                                 ctr128_f stream) {
925   unsigned int n, ctr;
926   uint64_t mlen = ctx->len.u[1];
927 #ifdef GCM_FUNCREF_4BIT
928   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
929 #ifdef GHASH
930   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
931                       size_t len) = ctx->ghash;
932 #endif
933 #endif
934 
935   mlen += len;
936   if (mlen > ((UINT64_C(1) << 36) - 32) ||
937       (sizeof(len) == 8 && mlen < len)) {
938     return 0;
939   }
940   ctx->len.u[1] = mlen;
941 
942   if (ctx->ares) {
943     /* First call to decrypt finalizes GHASH(AAD) */
944     GCM_MUL(ctx, Xi);
945     ctx->ares = 0;
946   }
947 
948   n = ctx->mres;
949   if (n) {
950     while (n && len) {
951       uint8_t c = *(in++);
952       *(out++) = c ^ ctx->EKi.c[n];
953       ctx->Xi.c[n] ^= c;
954       --len;
955       n = (n + 1) % 16;
956     }
957     if (n == 0) {
958       GCM_MUL(ctx, Xi);
959     } else {
960       ctx->mres = n;
961       return 1;
962     }
963   }
964 
965 #if defined(AESNI_GCM)
966   if (ctx->use_aesni_gcm_crypt) {
967     /* |aesni_gcm_decrypt| may not process all the input given to it. It may
968      * not process *any* of its input if it is deemed too small. */
969     size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
970     in += bulk;
971     out += bulk;
972     len -= bulk;
973   }
974 #endif
975 
976   ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
977 
978 #if defined(GHASH)
979   while (len >= GHASH_CHUNK) {
980     GHASH(ctx, in, GHASH_CHUNK);
981     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
982     ctr += GHASH_CHUNK / 16;
983     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
984     out += GHASH_CHUNK;
985     in += GHASH_CHUNK;
986     len -= GHASH_CHUNK;
987   }
988 #endif
989   size_t i = len & kSizeTWithoutLower4Bits;
990   if (i != 0) {
991     size_t j = i / 16;
992 
993 #if defined(GHASH)
994     GHASH(ctx, in, i);
995 #else
996     while (j--) {
997       size_t k;
998       for (k = 0; k < 16; ++k) {
999         ctx->Xi.c[k] ^= in[k];
1000       }
1001       GCM_MUL(ctx, Xi);
1002       in += 16;
1003     }
1004     j = i / 16;
1005     in -= i;
1006 #endif
1007     (*stream)(in, out, j, key, ctx->Yi.c);
1008     ctr += (unsigned int)j;
1009     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
1010     out += i;
1011     in += i;
1012     len -= i;
1013   }
1014   if (len) {
1015     (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1016     ++ctr;
1017     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
1018     while (len--) {
1019       uint8_t c = in[n];
1020       ctx->Xi.c[n] ^= c;
1021       out[n] = c ^ ctx->EKi.c[n];
1022       ++n;
1023     }
1024   }
1025 
1026   ctx->mres = n;
1027   return 1;
1028 }
1029 
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const uint8_t * tag,size_t len)1030 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
1031   uint64_t alen = ctx->len.u[0] << 3;
1032   uint64_t clen = ctx->len.u[1] << 3;
1033 #ifdef GCM_FUNCREF_4BIT
1034   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
1035 #endif
1036 
1037   if (ctx->mres || ctx->ares) {
1038     GCM_MUL(ctx, Xi);
1039   }
1040 
1041   alen = CRYPTO_bswap8(alen);
1042   clen = CRYPTO_bswap8(clen);
1043 
1044   ctx->Xi.u[0] ^= alen;
1045   ctx->Xi.u[1] ^= clen;
1046   GCM_MUL(ctx, Xi);
1047 
1048   ctx->Xi.u[0] ^= ctx->EK0.u[0];
1049   ctx->Xi.u[1] ^= ctx->EK0.u[1];
1050 
1051   if (tag && len <= sizeof(ctx->Xi)) {
1052     return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
1053   } else {
1054     return 0;
1055   }
1056 }
1057 
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)1058 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
1059   CRYPTO_gcm128_finish(ctx, NULL, 0);
1060   OPENSSL_memcpy(tag, ctx->Xi.c,
1061                  len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1062 }
1063 
1064 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
crypto_gcm_clmul_enabled(void)1065 int crypto_gcm_clmul_enabled(void) {
1066 #ifdef GHASH_ASM
1067   const uint32_t *ia32cap = OPENSSL_ia32cap_get();
1068   return (ia32cap[0] & (1 << 24)) && /* check FXSR bit */
1069          (ia32cap[1] & (1 << 1));    /* check PCLMULQDQ bit */
1070 #else
1071   return 0;
1072 #endif
1073 }
1074 #endif
1075