1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50 #define OPENSSL_FIPSAPI
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 # define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef GETU32
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
67 #undef PUTU32
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83 } while(0)
84
85 /*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
119 #if TABLE_BITS==8
120
gcm_init_8bit(u128 Htable[256],u64 H[2])121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
132 REDUCE1BIT(V);
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143 }
144
gcm_gmult_8bit(u64 Xi[2],const u128 Htable[256])145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 const union { long one; char little; } is_endian = {1};
151 static const size_t rem_8bit[256] = {
152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217 while (1) {
218 Z.hi ^= Htable[n].hi;
219 Z.lo ^= Htable[n].lo;
220
221 if ((u8 *)Xi==xi) break;
222
223 n = *(--xi);
224
225 rem = (size_t)Z.lo&0xff;
226 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227 Z.hi = (Z.hi>>8);
228 if (sizeof(size_t)==8)
229 Z.hi ^= rem_8bit[rem];
230 else
231 Z.hi ^= (u64)rem_8bit[rem]<<32;
232 }
233
234 if (is_endian.little) {
235 #ifdef BSWAP8
236 Xi[0] = BSWAP8(Z.hi);
237 Xi[1] = BSWAP8(Z.lo);
238 #else
239 u8 *p = (u8 *)Xi;
240 u32 v;
241 v = (u32)(Z.hi>>32); PUTU32(p,v);
242 v = (u32)(Z.hi); PUTU32(p+4,v);
243 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244 v = (u32)(Z.lo); PUTU32(p+12,v);
245 #endif
246 }
247 else {
248 Xi[0] = Z.hi;
249 Xi[1] = Z.lo;
250 }
251 }
252 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253
254 #elif TABLE_BITS==4
255
gcm_init_4bit(u128 Htable[16],u64 H[2])256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257 {
258 u128 V;
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260 int i;
261 #endif
262
263 Htable[0].hi = 0;
264 Htable[0].lo = 0;
265 V.hi = H[0];
266 V.lo = H[1];
267
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 for (Htable[8]=V, i=4; i>0; i>>=1) {
270 REDUCE1BIT(V);
271 Htable[i] = V;
272 }
273
274 for (i=2; i<16; i<<=1) {
275 u128 *Hi = Htable+i;
276 int j;
277 for (V=*Hi, j=1; j<i; ++j) {
278 Hi[j].hi = V.hi^Htable[j].hi;
279 Hi[j].lo = V.lo^Htable[j].lo;
280 }
281 }
282 #else
283 Htable[8] = V;
284 REDUCE1BIT(V);
285 Htable[4] = V;
286 REDUCE1BIT(V);
287 Htable[2] = V;
288 REDUCE1BIT(V);
289 Htable[1] = V;
290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
291 V=Htable[4];
292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
295 V=Htable[8];
296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303 #endif
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305 /*
306 * ARM assembler expects specific dword order in Htable.
307 */
308 {
309 int j;
310 const union { long one; char little; } is_endian = {1};
311
312 if (is_endian.little)
313 for (j=0;j<16;++j) {
314 V = Htable[j];
315 Htable[j].hi = V.lo;
316 Htable[j].lo = V.hi;
317 }
318 else
319 for (j=0;j<16;++j) {
320 V = Htable[j];
321 Htable[j].hi = V.lo<<32|V.lo>>32;
322 Htable[j].lo = V.hi<<32|V.hi>>32;
323 }
324 }
325 #endif
326 }
327
328 #ifndef GHASH_ASM
329 static const size_t rem_4bit[16] = {
330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336 {
337 u128 Z;
338 int cnt = 15;
339 size_t rem, nlo, nhi;
340 const union { long one; char little; } is_endian = {1};
341
342 nlo = ((const u8 *)Xi)[15];
343 nhi = nlo>>4;
344 nlo &= 0xf;
345
346 Z.hi = Htable[nlo].hi;
347 Z.lo = Htable[nlo].lo;
348
349 while (1) {
350 rem = (size_t)Z.lo&0xf;
351 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352 Z.hi = (Z.hi>>4);
353 if (sizeof(size_t)==8)
354 Z.hi ^= rem_4bit[rem];
355 else
356 Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358 Z.hi ^= Htable[nhi].hi;
359 Z.lo ^= Htable[nhi].lo;
360
361 if (--cnt<0) break;
362
363 nlo = ((const u8 *)Xi)[cnt];
364 nhi = nlo>>4;
365 nlo &= 0xf;
366
367 rem = (size_t)Z.lo&0xf;
368 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369 Z.hi = (Z.hi>>4);
370 if (sizeof(size_t)==8)
371 Z.hi ^= rem_4bit[rem];
372 else
373 Z.hi ^= (u64)rem_4bit[rem]<<32;
374
375 Z.hi ^= Htable[nlo].hi;
376 Z.lo ^= Htable[nlo].lo;
377 }
378
379 if (is_endian.little) {
380 #ifdef BSWAP8
381 Xi[0] = BSWAP8(Z.hi);
382 Xi[1] = BSWAP8(Z.lo);
383 #else
384 u8 *p = (u8 *)Xi;
385 u32 v;
386 v = (u32)(Z.hi>>32); PUTU32(p,v);
387 v = (u32)(Z.hi); PUTU32(p+4,v);
388 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389 v = (u32)(Z.lo); PUTU32(p+12,v);
390 #endif
391 }
392 else {
393 Xi[0] = Z.hi;
394 Xi[1] = Z.lo;
395 }
396 }
397
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
399 /*
400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401 * details... Compiler-generated code doesn't seem to give any
402 * performance improvement, at least not on x86[_64]. It's here
403 * mostly as reference and a placeholder for possible future
404 * non-trivial optimization[s]...
405 */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 const u8 *inp,size_t len)
408 {
409 u128 Z;
410 int cnt;
411 size_t rem, nlo, nhi;
412 const union { long one; char little; } is_endian = {1};
413
414 #if 1
415 do {
416 cnt = 15;
417 nlo = ((const u8 *)Xi)[15];
418 nlo ^= inp[15];
419 nhi = nlo>>4;
420 nlo &= 0xf;
421
422 Z.hi = Htable[nlo].hi;
423 Z.lo = Htable[nlo].lo;
424
425 while (1) {
426 rem = (size_t)Z.lo&0xf;
427 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428 Z.hi = (Z.hi>>4);
429 if (sizeof(size_t)==8)
430 Z.hi ^= rem_4bit[rem];
431 else
432 Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434 Z.hi ^= Htable[nhi].hi;
435 Z.lo ^= Htable[nhi].lo;
436
437 if (--cnt<0) break;
438
439 nlo = ((const u8 *)Xi)[cnt];
440 nlo ^= inp[cnt];
441 nhi = nlo>>4;
442 nlo &= 0xf;
443
444 rem = (size_t)Z.lo&0xf;
445 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446 Z.hi = (Z.hi>>4);
447 if (sizeof(size_t)==8)
448 Z.hi ^= rem_4bit[rem];
449 else
450 Z.hi ^= (u64)rem_4bit[rem]<<32;
451
452 Z.hi ^= Htable[nlo].hi;
453 Z.lo ^= Htable[nlo].lo;
454 }
455 #else
456 /*
457 * Extra 256+16 bytes per-key plus 512 bytes shared tables
458 * [should] give ~50% improvement... One could have PACK()-ed
459 * the rem_8bit even here, but the priority is to minimize
460 * cache footprint...
461 */
462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464 static const unsigned short rem_8bit[256] = {
465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497 /*
498 * This pre-processing phase slows down procedure by approximately
499 * same time as it makes each loop spin faster. In other words
500 * single block performance is approximately same as straightforward
501 * "4-bit" implementation, and then it goes only faster...
502 */
503 for (cnt=0; cnt<16; ++cnt) {
504 Z.hi = Htable[cnt].hi;
505 Z.lo = Htable[cnt].lo;
506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 Hshr4[cnt].hi = (Z.hi>>4);
508 Hshl4[cnt] = (u8)(Z.lo<<4);
509 }
510
511 do {
512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513 nlo = ((const u8 *)Xi)[cnt];
514 nlo ^= inp[cnt];
515 nhi = nlo>>4;
516 nlo &= 0xf;
517
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
520
521 rem = (size_t)Z.lo&0xff;
522
523 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524 Z.hi = (Z.hi>>8);
525
526 Z.hi ^= Hshr4[nhi].hi;
527 Z.lo ^= Hshr4[nhi].lo;
528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529 }
530
531 nlo = ((const u8 *)Xi)[0];
532 nlo ^= inp[0];
533 nhi = nlo>>4;
534 nlo &= 0xf;
535
536 Z.hi ^= Htable[nlo].hi;
537 Z.lo ^= Htable[nlo].lo;
538
539 rem = (size_t)Z.lo&0xf;
540
541 Z.lo = (Z.hi<<60)|(Z.lo>>4);
542 Z.hi = (Z.hi>>4);
543
544 Z.hi ^= Htable[nhi].hi;
545 Z.lo ^= Htable[nhi].lo;
546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547 #endif
548
549 if (is_endian.little) {
550 #ifdef BSWAP8
551 Xi[0] = BSWAP8(Z.hi);
552 Xi[1] = BSWAP8(Z.lo);
553 #else
554 u8 *p = (u8 *)Xi;
555 u32 v;
556 v = (u32)(Z.hi>>32); PUTU32(p,v);
557 v = (u32)(Z.hi); PUTU32(p+4,v);
558 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559 v = (u32)(Z.lo); PUTU32(p+12,v);
560 #endif
561 }
562 else {
563 Xi[0] = Z.hi;
564 Xi[1] = Z.lo;
565 }
566 } while (inp+=16, len-=16);
567 }
568 #endif
569 #else
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572 #endif
573
574 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578 * trashing effect. In other words idea is to hash data while it's
579 * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK (3*1024)
581 #endif
582
583 #else /* TABLE_BITS */
584
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586 {
587 u128 V,Z = { 0,0 };
588 long X;
589 int i,j;
590 const long *xi = (const long *)Xi;
591 const union { long one; char little; } is_endian = {1};
592
593 V.hi = H[0]; /* H is in host byte order, no byte swapping */
594 V.lo = H[1];
595
596 for (j=0; j<16/sizeof(long); ++j) {
597 if (is_endian.little) {
598 if (sizeof(long)==8) {
599 #ifdef BSWAP8
600 X = (long)(BSWAP8(xi[j]));
601 #else
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604 #endif
605 }
606 else {
607 const u8 *p = (const u8 *)(xi+j);
608 X = (long)GETU32(p);
609 }
610 }
611 else
612 X = xi[j];
613
614 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 u64 M = (u64)(X>>(8*sizeof(long)-1));
616 Z.hi ^= V.hi&M;
617 Z.lo ^= V.lo&M;
618
619 REDUCE1BIT(V);
620 }
621 }
622
623 if (is_endian.little) {
624 #ifdef BSWAP8
625 Xi[0] = BSWAP8(Z.hi);
626 Xi[1] = BSWAP8(Z.lo);
627 #else
628 u8 *p = (u8 *)Xi;
629 u32 v;
630 v = (u32)(Z.hi>>32); PUTU32(p,v);
631 v = (u32)(Z.hi); PUTU32(p+4,v);
632 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633 v = (u32)(Z.lo); PUTU32(p+12,v);
634 #endif
635 }
636 else {
637 Xi[0] = Z.hi;
638 Xi[1] = Z.lo;
639 }
640 }
641 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642
643 #endif
644
645 #if TABLE_BITS==4 && defined(GHASH_ASM)
646 # if !defined(I386_ONLY) && \
647 (defined(__i386) || defined(__i386__) || \
648 defined(__x86_64) || defined(__x86_64__) || \
649 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
650 # define GHASH_ASM_X86_OR_64
651 # define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
653
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
658 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define GHASH_ASM_X86
660 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
661 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
662
663 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
664 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
665 # endif
666 # elif defined(__arm__) || defined(__arm)
667 # include "arm_arch.h"
668 # if __ARM_ARCH__>=7
669 # define GHASH_ASM_ARM
670 # define GCM_FUNCREF_4BIT
671 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
672 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
673 # endif
674 # endif
675 #endif
676
677 #ifdef GCM_FUNCREF_4BIT
678 # undef GCM_MUL
679 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
680 # ifdef GHASH
681 # undef GHASH
682 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
683 # endif
684 #endif
685
686 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
687 {
688 const union { long one; char little; } is_endian = {1};
689
690 memset(ctx,0,sizeof(*ctx));
691 ctx->block = block;
692 ctx->key = key;
693
694 (*block)(ctx->H.c,ctx->H.c,key);
695
696 if (is_endian.little) {
697 /* H is stored in host byte order */
698 #ifdef BSWAP8
699 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
700 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
701 #else
702 u8 *p = ctx->H.c;
703 u64 hi,lo;
704 hi = (u64)GETU32(p) <<32|GETU32(p+4);
705 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
706 ctx->H.u[0] = hi;
707 ctx->H.u[1] = lo;
708 #endif
709 }
710
711 #if TABLE_BITS==8
712 gcm_init_8bit(ctx->Htable,ctx->H.u);
713 #elif TABLE_BITS==4
714 # if defined(GHASH_ASM_X86_OR_64)
715 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
716 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
717 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
718 gcm_init_clmul(ctx->Htable,ctx->H.u);
719 ctx->gmult = gcm_gmult_clmul;
720 ctx->ghash = gcm_ghash_clmul;
721 return;
722 }
723 # endif
724 gcm_init_4bit(ctx->Htable,ctx->H.u);
725 # if defined(GHASH_ASM_X86) /* x86 only */
726 # if defined(OPENSSL_IA32_SSE2)
727 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
728 # else
729 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
730 # endif
731 ctx->gmult = gcm_gmult_4bit_mmx;
732 ctx->ghash = gcm_ghash_4bit_mmx;
733 } else {
734 ctx->gmult = gcm_gmult_4bit_x86;
735 ctx->ghash = gcm_ghash_4bit_x86;
736 }
737 # else
738 ctx->gmult = gcm_gmult_4bit;
739 ctx->ghash = gcm_ghash_4bit;
740 # endif
741 # elif defined(GHASH_ASM_ARM)
742 if (OPENSSL_armcap_P & ARMV7_NEON) {
743 ctx->gmult = gcm_gmult_neon;
744 ctx->ghash = gcm_ghash_neon;
745 } else {
746 gcm_init_4bit(ctx->Htable,ctx->H.u);
747 ctx->gmult = gcm_gmult_4bit;
748 ctx->ghash = gcm_ghash_4bit;
749 }
750 # else
751 gcm_init_4bit(ctx->Htable,ctx->H.u);
752 # endif
753 #endif
754 }
755
756 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
757 {
758 const union { long one; char little; } is_endian = {1};
759 unsigned int ctr;
760 #ifdef GCM_FUNCREF_4BIT
761 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
762 #endif
763
764 ctx->Yi.u[0] = 0;
765 ctx->Yi.u[1] = 0;
766 ctx->Xi.u[0] = 0;
767 ctx->Xi.u[1] = 0;
768 ctx->len.u[0] = 0; /* AAD length */
769 ctx->len.u[1] = 0; /* message length */
770 ctx->ares = 0;
771 ctx->mres = 0;
772
773 if (len==12) {
774 memcpy(ctx->Yi.c,iv,12);
775 ctx->Yi.c[15]=1;
776 ctr=1;
777 }
778 else {
779 size_t i;
780 u64 len0 = len;
781
782 while (len>=16) {
783 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
784 GCM_MUL(ctx,Yi);
785 iv += 16;
786 len -= 16;
787 }
788 if (len) {
789 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
790 GCM_MUL(ctx,Yi);
791 }
792 len0 <<= 3;
793 if (is_endian.little) {
794 #ifdef BSWAP8
795 ctx->Yi.u[1] ^= BSWAP8(len0);
796 #else
797 ctx->Yi.c[8] ^= (u8)(len0>>56);
798 ctx->Yi.c[9] ^= (u8)(len0>>48);
799 ctx->Yi.c[10] ^= (u8)(len0>>40);
800 ctx->Yi.c[11] ^= (u8)(len0>>32);
801 ctx->Yi.c[12] ^= (u8)(len0>>24);
802 ctx->Yi.c[13] ^= (u8)(len0>>16);
803 ctx->Yi.c[14] ^= (u8)(len0>>8);
804 ctx->Yi.c[15] ^= (u8)(len0);
805 #endif
806 }
807 else
808 ctx->Yi.u[1] ^= len0;
809
810 GCM_MUL(ctx,Yi);
811
812 if (is_endian.little)
813 ctr = GETU32(ctx->Yi.c+12);
814 else
815 ctr = ctx->Yi.d[3];
816 }
817
818 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
819 ++ctr;
820 if (is_endian.little)
821 PUTU32(ctx->Yi.c+12,ctr);
822 else
823 ctx->Yi.d[3] = ctr;
824 }
825
826 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
827 {
828 size_t i;
829 unsigned int n;
830 u64 alen = ctx->len.u[0];
831 #ifdef GCM_FUNCREF_4BIT
832 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
833 # ifdef GHASH
834 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
835 const u8 *inp,size_t len) = ctx->ghash;
836 # endif
837 #endif
838
839 if (ctx->len.u[1]) return -2;
840
841 alen += len;
842 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
843 return -1;
844 ctx->len.u[0] = alen;
845
846 n = ctx->ares;
847 if (n) {
848 while (n && len) {
849 ctx->Xi.c[n] ^= *(aad++);
850 --len;
851 n = (n+1)%16;
852 }
853 if (n==0) GCM_MUL(ctx,Xi);
854 else {
855 ctx->ares = n;
856 return 0;
857 }
858 }
859
860 #ifdef GHASH
861 if ((i = (len&(size_t)-16))) {
862 GHASH(ctx,aad,i);
863 aad += i;
864 len -= i;
865 }
866 #else
867 while (len>=16) {
868 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
869 GCM_MUL(ctx,Xi);
870 aad += 16;
871 len -= 16;
872 }
873 #endif
874 if (len) {
875 n = (unsigned int)len;
876 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
877 }
878
879 ctx->ares = n;
880 return 0;
881 }
882
883 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
884 const unsigned char *in, unsigned char *out,
885 size_t len)
886 {
887 const union { long one; char little; } is_endian = {1};
888 unsigned int n, ctr;
889 size_t i;
890 u64 mlen = ctx->len.u[1];
891 block128_f block = ctx->block;
892 void *key = ctx->key;
893 #ifdef GCM_FUNCREF_4BIT
894 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
895 # ifdef GHASH
896 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
897 const u8 *inp,size_t len) = ctx->ghash;
898 # endif
899 #endif
900
901 #if 0
902 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
903 #endif
904 mlen += len;
905 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
906 return -1;
907 ctx->len.u[1] = mlen;
908
909 if (ctx->ares) {
910 /* First call to encrypt finalizes GHASH(AAD) */
911 GCM_MUL(ctx,Xi);
912 ctx->ares = 0;
913 }
914
915 if (is_endian.little)
916 ctr = GETU32(ctx->Yi.c+12);
917 else
918 ctr = ctx->Yi.d[3];
919
920 n = ctx->mres;
921 #if !defined(OPENSSL_SMALL_FOOTPRINT)
922 if (16%sizeof(size_t) == 0) do { /* always true actually */
923 if (n) {
924 while (n && len) {
925 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
926 --len;
927 n = (n+1)%16;
928 }
929 if (n==0) GCM_MUL(ctx,Xi);
930 else {
931 ctx->mres = n;
932 return 0;
933 }
934 }
935 #if defined(STRICT_ALIGNMENT)
936 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
937 break;
938 #endif
939 #if defined(GHASH) && defined(GHASH_CHUNK)
940 while (len>=GHASH_CHUNK) {
941 size_t j=GHASH_CHUNK;
942
943 while (j) {
944 (*block)(ctx->Yi.c,ctx->EKi.c,key);
945 ++ctr;
946 if (is_endian.little)
947 PUTU32(ctx->Yi.c+12,ctr);
948 else
949 ctx->Yi.d[3] = ctr;
950 for (i=0; i<16; i+=sizeof(size_t))
951 *(size_t *)(out+i) =
952 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
953 out += 16;
954 in += 16;
955 j -= 16;
956 }
957 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
958 len -= GHASH_CHUNK;
959 }
960 if ((i = (len&(size_t)-16))) {
961 size_t j=i;
962
963 while (len>=16) {
964 (*block)(ctx->Yi.c,ctx->EKi.c,key);
965 ++ctr;
966 if (is_endian.little)
967 PUTU32(ctx->Yi.c+12,ctr);
968 else
969 ctx->Yi.d[3] = ctr;
970 for (i=0; i<16; i+=sizeof(size_t))
971 *(size_t *)(out+i) =
972 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
973 out += 16;
974 in += 16;
975 len -= 16;
976 }
977 GHASH(ctx,out-j,j);
978 }
979 #else
980 while (len>=16) {
981 (*block)(ctx->Yi.c,ctx->EKi.c,key);
982 ++ctr;
983 if (is_endian.little)
984 PUTU32(ctx->Yi.c+12,ctr);
985 else
986 ctx->Yi.d[3] = ctr;
987 for (i=0; i<16; i+=sizeof(size_t))
988 *(size_t *)(ctx->Xi.c+i) ^=
989 *(size_t *)(out+i) =
990 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
991 GCM_MUL(ctx,Xi);
992 out += 16;
993 in += 16;
994 len -= 16;
995 }
996 #endif
997 if (len) {
998 (*block)(ctx->Yi.c,ctx->EKi.c,key);
999 ++ctr;
1000 if (is_endian.little)
1001 PUTU32(ctx->Yi.c+12,ctr);
1002 else
1003 ctx->Yi.d[3] = ctr;
1004 while (len--) {
1005 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1006 ++n;
1007 }
1008 }
1009
1010 ctx->mres = n;
1011 return 0;
1012 } while(0);
1013 #endif
1014 for (i=0;i<len;++i) {
1015 if (n==0) {
1016 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1017 ++ctr;
1018 if (is_endian.little)
1019 PUTU32(ctx->Yi.c+12,ctr);
1020 else
1021 ctx->Yi.d[3] = ctr;
1022 }
1023 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1024 n = (n+1)%16;
1025 if (n==0)
1026 GCM_MUL(ctx,Xi);
1027 }
1028
1029 ctx->mres = n;
1030 return 0;
1031 }
1032
1033 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1034 const unsigned char *in, unsigned char *out,
1035 size_t len)
1036 {
1037 const union { long one; char little; } is_endian = {1};
1038 unsigned int n, ctr;
1039 size_t i;
1040 u64 mlen = ctx->len.u[1];
1041 block128_f block = ctx->block;
1042 void *key = ctx->key;
1043 #ifdef GCM_FUNCREF_4BIT
1044 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1045 # ifdef GHASH
1046 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1047 const u8 *inp,size_t len) = ctx->ghash;
1048 # endif
1049 #endif
1050
1051 mlen += len;
1052 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1053 return -1;
1054 ctx->len.u[1] = mlen;
1055
1056 if (ctx->ares) {
1057 /* First call to decrypt finalizes GHASH(AAD) */
1058 GCM_MUL(ctx,Xi);
1059 ctx->ares = 0;
1060 }
1061
1062 if (is_endian.little)
1063 ctr = GETU32(ctx->Yi.c+12);
1064 else
1065 ctr = ctx->Yi.d[3];
1066
1067 n = ctx->mres;
1068 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1069 if (16%sizeof(size_t) == 0) do { /* always true actually */
1070 if (n) {
1071 while (n && len) {
1072 u8 c = *(in++);
1073 *(out++) = c^ctx->EKi.c[n];
1074 ctx->Xi.c[n] ^= c;
1075 --len;
1076 n = (n+1)%16;
1077 }
1078 if (n==0) GCM_MUL (ctx,Xi);
1079 else {
1080 ctx->mres = n;
1081 return 0;
1082 }
1083 }
1084 #if defined(STRICT_ALIGNMENT)
1085 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1086 break;
1087 #endif
1088 #if defined(GHASH) && defined(GHASH_CHUNK)
1089 while (len>=GHASH_CHUNK) {
1090 size_t j=GHASH_CHUNK;
1091
1092 GHASH(ctx,in,GHASH_CHUNK);
1093 while (j) {
1094 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1095 ++ctr;
1096 if (is_endian.little)
1097 PUTU32(ctx->Yi.c+12,ctr);
1098 else
1099 ctx->Yi.d[3] = ctr;
1100 for (i=0; i<16; i+=sizeof(size_t))
1101 *(size_t *)(out+i) =
1102 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1103 out += 16;
1104 in += 16;
1105 j -= 16;
1106 }
1107 len -= GHASH_CHUNK;
1108 }
1109 if ((i = (len&(size_t)-16))) {
1110 GHASH(ctx,in,i);
1111 while (len>=16) {
1112 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1113 ++ctr;
1114 if (is_endian.little)
1115 PUTU32(ctx->Yi.c+12,ctr);
1116 else
1117 ctx->Yi.d[3] = ctr;
1118 for (i=0; i<16; i+=sizeof(size_t))
1119 *(size_t *)(out+i) =
1120 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1121 out += 16;
1122 in += 16;
1123 len -= 16;
1124 }
1125 }
1126 #else
1127 while (len>=16) {
1128 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1129 ++ctr;
1130 if (is_endian.little)
1131 PUTU32(ctx->Yi.c+12,ctr);
1132 else
1133 ctx->Yi.d[3] = ctr;
1134 for (i=0; i<16; i+=sizeof(size_t)) {
1135 size_t c = *(size_t *)(in+i);
1136 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1137 *(size_t *)(ctx->Xi.c+i) ^= c;
1138 }
1139 GCM_MUL(ctx,Xi);
1140 out += 16;
1141 in += 16;
1142 len -= 16;
1143 }
1144 #endif
1145 if (len) {
1146 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1147 ++ctr;
1148 if (is_endian.little)
1149 PUTU32(ctx->Yi.c+12,ctr);
1150 else
1151 ctx->Yi.d[3] = ctr;
1152 while (len--) {
1153 u8 c = in[n];
1154 ctx->Xi.c[n] ^= c;
1155 out[n] = c^ctx->EKi.c[n];
1156 ++n;
1157 }
1158 }
1159
1160 ctx->mres = n;
1161 return 0;
1162 } while(0);
1163 #endif
1164 for (i=0;i<len;++i) {
1165 u8 c;
1166 if (n==0) {
1167 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1168 ++ctr;
1169 if (is_endian.little)
1170 PUTU32(ctx->Yi.c+12,ctr);
1171 else
1172 ctx->Yi.d[3] = ctr;
1173 }
1174 c = in[i];
1175 out[i] = c^ctx->EKi.c[n];
1176 ctx->Xi.c[n] ^= c;
1177 n = (n+1)%16;
1178 if (n==0)
1179 GCM_MUL(ctx,Xi);
1180 }
1181
1182 ctx->mres = n;
1183 return 0;
1184 }
1185
1186 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1187 const unsigned char *in, unsigned char *out,
1188 size_t len, ctr128_f stream)
1189 {
1190 const union { long one; char little; } is_endian = {1};
1191 unsigned int n, ctr;
1192 size_t i;
1193 u64 mlen = ctx->len.u[1];
1194 void *key = ctx->key;
1195 #ifdef GCM_FUNCREF_4BIT
1196 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1197 # ifdef GHASH
1198 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1199 const u8 *inp,size_t len) = ctx->ghash;
1200 # endif
1201 #endif
1202
1203 mlen += len;
1204 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1205 return -1;
1206 ctx->len.u[1] = mlen;
1207
1208 if (ctx->ares) {
1209 /* First call to encrypt finalizes GHASH(AAD) */
1210 GCM_MUL(ctx,Xi);
1211 ctx->ares = 0;
1212 }
1213
1214 if (is_endian.little)
1215 ctr = GETU32(ctx->Yi.c+12);
1216 else
1217 ctr = ctx->Yi.d[3];
1218
1219 n = ctx->mres;
1220 if (n) {
1221 while (n && len) {
1222 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1223 --len;
1224 n = (n+1)%16;
1225 }
1226 if (n==0) GCM_MUL(ctx,Xi);
1227 else {
1228 ctx->mres = n;
1229 return 0;
1230 }
1231 }
1232 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1233 while (len>=GHASH_CHUNK) {
1234 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1235 ctr += GHASH_CHUNK/16;
1236 if (is_endian.little)
1237 PUTU32(ctx->Yi.c+12,ctr);
1238 else
1239 ctx->Yi.d[3] = ctr;
1240 GHASH(ctx,out,GHASH_CHUNK);
1241 out += GHASH_CHUNK;
1242 in += GHASH_CHUNK;
1243 len -= GHASH_CHUNK;
1244 }
1245 #endif
1246 if ((i = (len&(size_t)-16))) {
1247 size_t j=i/16;
1248
1249 (*stream)(in,out,j,key,ctx->Yi.c);
1250 ctr += (unsigned int)j;
1251 if (is_endian.little)
1252 PUTU32(ctx->Yi.c+12,ctr);
1253 else
1254 ctx->Yi.d[3] = ctr;
1255 in += i;
1256 len -= i;
1257 #if defined(GHASH)
1258 GHASH(ctx,out,i);
1259 out += i;
1260 #else
1261 while (j--) {
1262 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1263 GCM_MUL(ctx,Xi);
1264 out += 16;
1265 }
1266 #endif
1267 }
1268 if (len) {
1269 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1270 ++ctr;
1271 if (is_endian.little)
1272 PUTU32(ctx->Yi.c+12,ctr);
1273 else
1274 ctx->Yi.d[3] = ctr;
1275 while (len--) {
1276 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1277 ++n;
1278 }
1279 }
1280
1281 ctx->mres = n;
1282 return 0;
1283 }
1284
1285 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1286 const unsigned char *in, unsigned char *out,
1287 size_t len,ctr128_f stream)
1288 {
1289 const union { long one; char little; } is_endian = {1};
1290 unsigned int n, ctr;
1291 size_t i;
1292 u64 mlen = ctx->len.u[1];
1293 void *key = ctx->key;
1294 #ifdef GCM_FUNCREF_4BIT
1295 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1296 # ifdef GHASH
1297 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1298 const u8 *inp,size_t len) = ctx->ghash;
1299 # endif
1300 #endif
1301
1302 mlen += len;
1303 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1304 return -1;
1305 ctx->len.u[1] = mlen;
1306
1307 if (ctx->ares) {
1308 /* First call to decrypt finalizes GHASH(AAD) */
1309 GCM_MUL(ctx,Xi);
1310 ctx->ares = 0;
1311 }
1312
1313 if (is_endian.little)
1314 ctr = GETU32(ctx->Yi.c+12);
1315 else
1316 ctr = ctx->Yi.d[3];
1317
1318 n = ctx->mres;
1319 if (n) {
1320 while (n && len) {
1321 u8 c = *(in++);
1322 *(out++) = c^ctx->EKi.c[n];
1323 ctx->Xi.c[n] ^= c;
1324 --len;
1325 n = (n+1)%16;
1326 }
1327 if (n==0) GCM_MUL (ctx,Xi);
1328 else {
1329 ctx->mres = n;
1330 return 0;
1331 }
1332 }
1333 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1334 while (len>=GHASH_CHUNK) {
1335 GHASH(ctx,in,GHASH_CHUNK);
1336 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1337 ctr += GHASH_CHUNK/16;
1338 if (is_endian.little)
1339 PUTU32(ctx->Yi.c+12,ctr);
1340 else
1341 ctx->Yi.d[3] = ctr;
1342 out += GHASH_CHUNK;
1343 in += GHASH_CHUNK;
1344 len -= GHASH_CHUNK;
1345 }
1346 #endif
1347 if ((i = (len&(size_t)-16))) {
1348 size_t j=i/16;
1349
1350 #if defined(GHASH)
1351 GHASH(ctx,in,i);
1352 #else
1353 while (j--) {
1354 size_t k;
1355 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1356 GCM_MUL(ctx,Xi);
1357 in += 16;
1358 }
1359 j = i/16;
1360 in -= i;
1361 #endif
1362 (*stream)(in,out,j,key,ctx->Yi.c);
1363 ctr += (unsigned int)j;
1364 if (is_endian.little)
1365 PUTU32(ctx->Yi.c+12,ctr);
1366 else
1367 ctx->Yi.d[3] = ctr;
1368 out += i;
1369 in += i;
1370 len -= i;
1371 }
1372 if (len) {
1373 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1374 ++ctr;
1375 if (is_endian.little)
1376 PUTU32(ctx->Yi.c+12,ctr);
1377 else
1378 ctx->Yi.d[3] = ctr;
1379 while (len--) {
1380 u8 c = in[n];
1381 ctx->Xi.c[n] ^= c;
1382 out[n] = c^ctx->EKi.c[n];
1383 ++n;
1384 }
1385 }
1386
1387 ctx->mres = n;
1388 return 0;
1389 }
1390
1391 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1392 size_t len)
1393 {
1394 const union { long one; char little; } is_endian = {1};
1395 u64 alen = ctx->len.u[0]<<3;
1396 u64 clen = ctx->len.u[1]<<3;
1397 #ifdef GCM_FUNCREF_4BIT
1398 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1399 #endif
1400
1401 if (ctx->mres || ctx->ares)
1402 GCM_MUL(ctx,Xi);
1403
1404 if (is_endian.little) {
1405 #ifdef BSWAP8
1406 alen = BSWAP8(alen);
1407 clen = BSWAP8(clen);
1408 #else
1409 u8 *p = ctx->len.c;
1410
1411 ctx->len.u[0] = alen;
1412 ctx->len.u[1] = clen;
1413
1414 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1415 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1416 #endif
1417 }
1418
1419 ctx->Xi.u[0] ^= alen;
1420 ctx->Xi.u[1] ^= clen;
1421 GCM_MUL(ctx,Xi);
1422
1423 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1424 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1425
1426 if (tag && len<=sizeof(ctx->Xi))
1427 return memcmp(ctx->Xi.c,tag,len);
1428 else
1429 return -1;
1430 }
1431
1432 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1433 {
1434 CRYPTO_gcm128_finish(ctx, NULL, 0);
1435 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1436 }
1437
1438 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1439 {
1440 GCM128_CONTEXT *ret;
1441
1442 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1443 CRYPTO_gcm128_init(ret,key,block);
1444
1445 return ret;
1446 }
1447
1448 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1449 {
1450 if (ctx) {
1451 OPENSSL_cleanse(ctx,sizeof(*ctx));
1452 OPENSSL_free(ctx);
1453 }
1454 }
1455
1456 #if defined(SELFTEST)
1457 #include <stdio.h>
1458 #include <openssl/aes.h>
1459
1460 /* Test Case 1 */
1461 static const u8 K1[16],
1462 *P1=NULL,
1463 *A1=NULL,
1464 IV1[12],
1465 *C1=NULL,
1466 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1467
1468 /* Test Case 2 */
1469 #define K2 K1
1470 #define A2 A1
1471 #define IV2 IV1
1472 static const u8 P2[16],
1473 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1474 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1475
1476 /* Test Case 3 */
1477 #define A3 A2
1478 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1479 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1480 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1481 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1482 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1483 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1484 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1485 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1486 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1487 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1488 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1489
1490 /* Test Case 4 */
1491 #define K4 K3
1492 #define IV4 IV3
1493 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1494 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1495 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1496 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1497 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1498 0xab,0xad,0xda,0xd2},
1499 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1500 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1501 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1502 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1503 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1504
1505 /* Test Case 5 */
1506 #define K5 K4
1507 #define P5 P4
1508 #define A5 A4
1509 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1510 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1511 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1512 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1513 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1514 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1515
1516 /* Test Case 6 */
1517 #define K6 K5
1518 #define P6 P5
1519 #define A6 A5
1520 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1521 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1522 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1523 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1524 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1525 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1526 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1527 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1528 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1529
1530 /* Test Case 7 */
1531 static const u8 K7[24],
1532 *P7=NULL,
1533 *A7=NULL,
1534 IV7[12],
1535 *C7=NULL,
1536 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1537
1538 /* Test Case 8 */
1539 #define K8 K7
1540 #define IV8 IV7
1541 #define A8 A7
1542 static const u8 P8[16],
1543 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1544 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1545
1546 /* Test Case 9 */
1547 #define A9 A8
1548 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1549 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1550 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1551 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1552 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1553 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1554 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1555 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1556 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1557 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1558 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1559 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1560
1561 /* Test Case 10 */
1562 #define K10 K9
1563 #define IV10 IV9
1564 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1565 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1566 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1567 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1568 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1569 0xab,0xad,0xda,0xd2},
1570 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1571 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1572 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1573 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1574 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1575
1576 /* Test Case 11 */
1577 #define K11 K10
1578 #define P11 P10
1579 #define A11 A10
1580 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1581 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1582 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1583 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1584 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1585 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1586
1587 /* Test Case 12 */
1588 #define K12 K11
1589 #define P12 P11
1590 #define A12 A11
1591 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1592 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1593 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1594 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1595 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1596 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1597 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1598 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1599 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1600
1601 /* Test Case 13 */
1602 static const u8 K13[32],
1603 *P13=NULL,
1604 *A13=NULL,
1605 IV13[12],
1606 *C13=NULL,
1607 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1608
1609 /* Test Case 14 */
1610 #define K14 K13
1611 #define A14 A13
1612 static const u8 P14[16],
1613 IV14[12],
1614 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1615 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1616
1617 /* Test Case 15 */
1618 #define A15 A14
1619 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1620 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1621 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1622 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1623 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1624 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1625 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1626 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1627 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1628 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1629 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1630 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1631
1632 /* Test Case 16 */
1633 #define K16 K15
1634 #define IV16 IV15
1635 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1636 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1637 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1638 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1639 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1640 0xab,0xad,0xda,0xd2},
1641 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1642 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1643 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1644 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1645 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1646
1647 /* Test Case 17 */
1648 #define K17 K16
1649 #define P17 P16
1650 #define A17 A16
1651 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1652 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1653 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1654 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1655 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1656 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1657
1658 /* Test Case 18 */
1659 #define K18 K17
1660 #define P18 P17
1661 #define A18 A17
1662 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1663 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1664 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1665 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1666 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1667 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1668 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1669 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1670 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1671
1672 #define TEST_CASE(n) do { \
1673 u8 out[sizeof(P##n)]; \
1674 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1675 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1676 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1677 memset(out,0,sizeof(out)); \
1678 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1679 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1680 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1681 (C##n && memcmp(out,C##n,sizeof(out)))) \
1682 ret++, printf ("encrypt test#%d failed.\n",n); \
1683 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1684 memset(out,0,sizeof(out)); \
1685 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1686 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1687 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1688 (P##n && memcmp(out,P##n,sizeof(out)))) \
1689 ret++, printf ("decrypt test#%d failed.\n",n); \
1690 } while(0)
1691
1692 int main()
1693 {
1694 GCM128_CONTEXT ctx;
1695 AES_KEY key;
1696 int ret=0;
1697
1698 TEST_CASE(1);
1699 TEST_CASE(2);
1700 TEST_CASE(3);
1701 TEST_CASE(4);
1702 TEST_CASE(5);
1703 TEST_CASE(6);
1704 TEST_CASE(7);
1705 TEST_CASE(8);
1706 TEST_CASE(9);
1707 TEST_CASE(10);
1708 TEST_CASE(11);
1709 TEST_CASE(12);
1710 TEST_CASE(13);
1711 TEST_CASE(14);
1712 TEST_CASE(15);
1713 TEST_CASE(16);
1714 TEST_CASE(17);
1715 TEST_CASE(18);
1716
1717 #ifdef OPENSSL_CPUID_OBJ
1718 {
1719 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1720 union { u64 u; u8 c[1024]; } buf;
1721 int i;
1722
1723 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1724 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1725 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1726
1727 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1728 start = OPENSSL_rdtsc();
1729 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1730 gcm_t = OPENSSL_rdtsc() - start;
1731
1732 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1733 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1734 (block128_f)AES_encrypt);
1735 start = OPENSSL_rdtsc();
1736 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1737 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1738 (block128_f)AES_encrypt);
1739 ctr_t = OPENSSL_rdtsc() - start;
1740
1741 printf("%.2f-%.2f=%.2f\n",
1742 gcm_t/(double)sizeof(buf),
1743 ctr_t/(double)sizeof(buf),
1744 (gcm_t-ctr_t)/(double)sizeof(buf));
1745 #ifdef GHASH
1746 GHASH(&ctx,buf.c,sizeof(buf));
1747 start = OPENSSL_rdtsc();
1748 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1749 gcm_t = OPENSSL_rdtsc() - start;
1750 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1751 #endif
1752 }
1753 #endif
1754
1755 return ret;
1756 }
1757 #endif
1758