1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50 #define OPENSSL_FIPSAPI
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 # define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef GETU32
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
67 #undef PUTU32
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83 } while(0)
84
85 /*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
119 #if TABLE_BITS==8
120
gcm_init_8bit(u128 Htable[256],u64 H[2])121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
132 REDUCE1BIT(V);
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143 }
144
gcm_gmult_8bit(u64 Xi[2],const u128 Htable[256])145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 const union { long one; char little; } is_endian = {1};
151 static const size_t rem_8bit[256] = {
152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217 while (1) {
218 Z.hi ^= Htable[n].hi;
219 Z.lo ^= Htable[n].lo;
220
221 if ((u8 *)Xi==xi) break;
222
223 n = *(--xi);
224
225 rem = (size_t)Z.lo&0xff;
226 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227 Z.hi = (Z.hi>>8);
228 if (sizeof(size_t)==8)
229 Z.hi ^= rem_8bit[rem];
230 else
231 Z.hi ^= (u64)rem_8bit[rem]<<32;
232 }
233
234 if (is_endian.little) {
235 #ifdef BSWAP8
236 Xi[0] = BSWAP8(Z.hi);
237 Xi[1] = BSWAP8(Z.lo);
238 #else
239 u8 *p = (u8 *)Xi;
240 u32 v;
241 v = (u32)(Z.hi>>32); PUTU32(p,v);
242 v = (u32)(Z.hi); PUTU32(p+4,v);
243 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244 v = (u32)(Z.lo); PUTU32(p+12,v);
245 #endif
246 }
247 else {
248 Xi[0] = Z.hi;
249 Xi[1] = Z.lo;
250 }
251 }
252 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253
254 #elif TABLE_BITS==4
255
gcm_init_4bit(u128 Htable[16],u64 H[2])256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257 {
258 u128 V;
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260 int i;
261 #endif
262
263 Htable[0].hi = 0;
264 Htable[0].lo = 0;
265 V.hi = H[0];
266 V.lo = H[1];
267
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 for (Htable[8]=V, i=4; i>0; i>>=1) {
270 REDUCE1BIT(V);
271 Htable[i] = V;
272 }
273
274 for (i=2; i<16; i<<=1) {
275 u128 *Hi = Htable+i;
276 int j;
277 for (V=*Hi, j=1; j<i; ++j) {
278 Hi[j].hi = V.hi^Htable[j].hi;
279 Hi[j].lo = V.lo^Htable[j].lo;
280 }
281 }
282 #else
283 Htable[8] = V;
284 REDUCE1BIT(V);
285 Htable[4] = V;
286 REDUCE1BIT(V);
287 Htable[2] = V;
288 REDUCE1BIT(V);
289 Htable[1] = V;
290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
291 V=Htable[4];
292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
295 V=Htable[8];
296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303 #endif
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305 /*
306 * ARM assembler expects specific dword order in Htable.
307 */
308 {
309 int j;
310 const union { long one; char little; } is_endian = {1};
311
312 if (is_endian.little)
313 for (j=0;j<16;++j) {
314 V = Htable[j];
315 Htable[j].hi = V.lo;
316 Htable[j].lo = V.hi;
317 }
318 else
319 for (j=0;j<16;++j) {
320 V = Htable[j];
321 Htable[j].hi = V.lo<<32|V.lo>>32;
322 Htable[j].lo = V.hi<<32|V.hi>>32;
323 }
324 }
325 #endif
326 }
327
328 #ifndef GHASH_ASM
329 static const size_t rem_4bit[16] = {
330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336 {
337 u128 Z;
338 int cnt = 15;
339 size_t rem, nlo, nhi;
340 const union { long one; char little; } is_endian = {1};
341
342 nlo = ((const u8 *)Xi)[15];
343 nhi = nlo>>4;
344 nlo &= 0xf;
345
346 Z.hi = Htable[nlo].hi;
347 Z.lo = Htable[nlo].lo;
348
349 while (1) {
350 rem = (size_t)Z.lo&0xf;
351 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352 Z.hi = (Z.hi>>4);
353 if (sizeof(size_t)==8)
354 Z.hi ^= rem_4bit[rem];
355 else
356 Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358 Z.hi ^= Htable[nhi].hi;
359 Z.lo ^= Htable[nhi].lo;
360
361 if (--cnt<0) break;
362
363 nlo = ((const u8 *)Xi)[cnt];
364 nhi = nlo>>4;
365 nlo &= 0xf;
366
367 rem = (size_t)Z.lo&0xf;
368 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369 Z.hi = (Z.hi>>4);
370 if (sizeof(size_t)==8)
371 Z.hi ^= rem_4bit[rem];
372 else
373 Z.hi ^= (u64)rem_4bit[rem]<<32;
374
375 Z.hi ^= Htable[nlo].hi;
376 Z.lo ^= Htable[nlo].lo;
377 }
378
379 if (is_endian.little) {
380 #ifdef BSWAP8
381 Xi[0] = BSWAP8(Z.hi);
382 Xi[1] = BSWAP8(Z.lo);
383 #else
384 u8 *p = (u8 *)Xi;
385 u32 v;
386 v = (u32)(Z.hi>>32); PUTU32(p,v);
387 v = (u32)(Z.hi); PUTU32(p+4,v);
388 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389 v = (u32)(Z.lo); PUTU32(p+12,v);
390 #endif
391 }
392 else {
393 Xi[0] = Z.hi;
394 Xi[1] = Z.lo;
395 }
396 }
397
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
399 /*
400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401 * details... Compiler-generated code doesn't seem to give any
402 * performance improvement, at least not on x86[_64]. It's here
403 * mostly as reference and a placeholder for possible future
404 * non-trivial optimization[s]...
405 */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 const u8 *inp,size_t len)
408 {
409 u128 Z;
410 int cnt;
411 size_t rem, nlo, nhi;
412 const union { long one; char little; } is_endian = {1};
413
414 #if 1
415 do {
416 cnt = 15;
417 nlo = ((const u8 *)Xi)[15];
418 nlo ^= inp[15];
419 nhi = nlo>>4;
420 nlo &= 0xf;
421
422 Z.hi = Htable[nlo].hi;
423 Z.lo = Htable[nlo].lo;
424
425 while (1) {
426 rem = (size_t)Z.lo&0xf;
427 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428 Z.hi = (Z.hi>>4);
429 if (sizeof(size_t)==8)
430 Z.hi ^= rem_4bit[rem];
431 else
432 Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434 Z.hi ^= Htable[nhi].hi;
435 Z.lo ^= Htable[nhi].lo;
436
437 if (--cnt<0) break;
438
439 nlo = ((const u8 *)Xi)[cnt];
440 nlo ^= inp[cnt];
441 nhi = nlo>>4;
442 nlo &= 0xf;
443
444 rem = (size_t)Z.lo&0xf;
445 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446 Z.hi = (Z.hi>>4);
447 if (sizeof(size_t)==8)
448 Z.hi ^= rem_4bit[rem];
449 else
450 Z.hi ^= (u64)rem_4bit[rem]<<32;
451
452 Z.hi ^= Htable[nlo].hi;
453 Z.lo ^= Htable[nlo].lo;
454 }
455 #else
456 /*
457 * Extra 256+16 bytes per-key plus 512 bytes shared tables
458 * [should] give ~50% improvement... One could have PACK()-ed
459 * the rem_8bit even here, but the priority is to minimize
460 * cache footprint...
461 */
462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464 static const unsigned short rem_8bit[256] = {
465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497 /*
498 * This pre-processing phase slows down procedure by approximately
499 * same time as it makes each loop spin faster. In other words
500 * single block performance is approximately same as straightforward
501 * "4-bit" implementation, and then it goes only faster...
502 */
503 for (cnt=0; cnt<16; ++cnt) {
504 Z.hi = Htable[cnt].hi;
505 Z.lo = Htable[cnt].lo;
506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 Hshr4[cnt].hi = (Z.hi>>4);
508 Hshl4[cnt] = (u8)(Z.lo<<4);
509 }
510
511 do {
512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513 nlo = ((const u8 *)Xi)[cnt];
514 nlo ^= inp[cnt];
515 nhi = nlo>>4;
516 nlo &= 0xf;
517
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
520
521 rem = (size_t)Z.lo&0xff;
522
523 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524 Z.hi = (Z.hi>>8);
525
526 Z.hi ^= Hshr4[nhi].hi;
527 Z.lo ^= Hshr4[nhi].lo;
528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529 }
530
531 nlo = ((const u8 *)Xi)[0];
532 nlo ^= inp[0];
533 nhi = nlo>>4;
534 nlo &= 0xf;
535
536 Z.hi ^= Htable[nlo].hi;
537 Z.lo ^= Htable[nlo].lo;
538
539 rem = (size_t)Z.lo&0xf;
540
541 Z.lo = (Z.hi<<60)|(Z.lo>>4);
542 Z.hi = (Z.hi>>4);
543
544 Z.hi ^= Htable[nhi].hi;
545 Z.lo ^= Htable[nhi].lo;
546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547 #endif
548
549 if (is_endian.little) {
550 #ifdef BSWAP8
551 Xi[0] = BSWAP8(Z.hi);
552 Xi[1] = BSWAP8(Z.lo);
553 #else
554 u8 *p = (u8 *)Xi;
555 u32 v;
556 v = (u32)(Z.hi>>32); PUTU32(p,v);
557 v = (u32)(Z.hi); PUTU32(p+4,v);
558 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559 v = (u32)(Z.lo); PUTU32(p+12,v);
560 #endif
561 }
562 else {
563 Xi[0] = Z.hi;
564 Xi[1] = Z.lo;
565 }
566 } while (inp+=16, len-=16);
567 }
568 #endif
569 #else
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572 #endif
573
574 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578 * trashing effect. In other words idea is to hash data while it's
579 * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK (3*1024)
581 #endif
582
583 #else /* TABLE_BITS */
584
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586 {
587 u128 V,Z = { 0,0 };
588 long X;
589 int i,j;
590 const long *xi = (const long *)Xi;
591 const union { long one; char little; } is_endian = {1};
592
593 V.hi = H[0]; /* H is in host byte order, no byte swapping */
594 V.lo = H[1];
595
596 for (j=0; j<16/sizeof(long); ++j) {
597 if (is_endian.little) {
598 if (sizeof(long)==8) {
599 #ifdef BSWAP8
600 X = (long)(BSWAP8(xi[j]));
601 #else
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604 #endif
605 }
606 else {
607 const u8 *p = (const u8 *)(xi+j);
608 X = (long)GETU32(p);
609 }
610 }
611 else
612 X = xi[j];
613
614 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 u64 M = (u64)(X>>(8*sizeof(long)-1));
616 Z.hi ^= V.hi&M;
617 Z.lo ^= V.lo&M;
618
619 REDUCE1BIT(V);
620 }
621 }
622
623 if (is_endian.little) {
624 #ifdef BSWAP8
625 Xi[0] = BSWAP8(Z.hi);
626 Xi[1] = BSWAP8(Z.lo);
627 #else
628 u8 *p = (u8 *)Xi;
629 u32 v;
630 v = (u32)(Z.hi>>32); PUTU32(p,v);
631 v = (u32)(Z.hi); PUTU32(p+4,v);
632 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633 v = (u32)(Z.lo); PUTU32(p+12,v);
634 #endif
635 }
636 else {
637 Xi[0] = Z.hi;
638 Xi[1] = Z.lo;
639 }
640 }
641 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642
643 #endif
644
645 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
646 # if !defined(I386_ONLY) && \
647 (defined(__i386) || defined(__i386__) || \
648 defined(__x86_64) || defined(__x86_64__) || \
649 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
650 # define GHASH_ASM_X86_OR_64
651 # define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
653
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
658 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define GHASH_ASM_X86
660 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
661 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
662
663 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
664 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
665 # endif
666 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
667 # include "arm_arch.h"
668 # if __ARM_ARCH__>=7
669 # define GHASH_ASM_ARM
670 # define GCM_FUNCREF_4BIT
671 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
672 # if defined(__arm__) || defined(__arm)
673 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
674 # endif
675 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
676 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
677 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
678 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
679 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
680 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
681 # endif
682 # endif
683 #endif
684
685 #ifdef GCM_FUNCREF_4BIT
686 # undef GCM_MUL
687 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
688 # ifdef GHASH
689 # undef GHASH
690 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
691 # endif
692 #endif
693
694 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
695 {
696 const union { long one; char little; } is_endian = {1};
697
698 memset(ctx,0,sizeof(*ctx));
699 ctx->block = block;
700 ctx->key = key;
701
702 (*block)(ctx->H.c,ctx->H.c,key);
703
704 if (is_endian.little) {
705 /* H is stored in host byte order */
706 #ifdef BSWAP8
707 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
708 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
709 #else
710 u8 *p = ctx->H.c;
711 u64 hi,lo;
712 hi = (u64)GETU32(p) <<32|GETU32(p+4);
713 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
714 ctx->H.u[0] = hi;
715 ctx->H.u[1] = lo;
716 #endif
717 }
718
719 #if TABLE_BITS==8
720 gcm_init_8bit(ctx->Htable,ctx->H.u);
721 #elif TABLE_BITS==4
722 # if defined(GHASH_ASM_X86_OR_64)
723 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
724 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
725 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
726 gcm_init_clmul(ctx->Htable,ctx->H.u);
727 ctx->gmult = gcm_gmult_clmul;
728 ctx->ghash = gcm_ghash_clmul;
729 return;
730 }
731 # endif
732 gcm_init_4bit(ctx->Htable,ctx->H.u);
733 # if defined(GHASH_ASM_X86) /* x86 only */
734 # if defined(OPENSSL_IA32_SSE2)
735 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
736 # else
737 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
738 # endif
739 ctx->gmult = gcm_gmult_4bit_mmx;
740 ctx->ghash = gcm_ghash_4bit_mmx;
741 } else {
742 ctx->gmult = gcm_gmult_4bit_x86;
743 ctx->ghash = gcm_ghash_4bit_x86;
744 }
745 # else
746 ctx->gmult = gcm_gmult_4bit;
747 ctx->ghash = gcm_ghash_4bit;
748 # endif
749 # elif defined(GHASH_ASM_ARM)
750 # ifdef PMULL_CAPABLE
751 if (PMULL_CAPABLE) {
752 gcm_init_v8(ctx->Htable,ctx->H.u);
753 ctx->gmult = gcm_gmult_v8;
754 ctx->ghash = gcm_ghash_v8;
755 } else
756 # endif
757 # ifdef NEON_CAPABLE
758 if (NEON_CAPABLE) {
759 gcm_init_neon(ctx->Htable,ctx->H.u);
760 ctx->gmult = gcm_gmult_neon;
761 ctx->ghash = gcm_ghash_neon;
762 } else
763 # endif
764 {
765 gcm_init_4bit(ctx->Htable,ctx->H.u);
766 ctx->gmult = gcm_gmult_4bit;
767 ctx->ghash = gcm_ghash_4bit;
768 }
769 # else
770 gcm_init_4bit(ctx->Htable,ctx->H.u);
771 # endif
772 #endif
773 }
774
775 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
776 {
777 const union { long one; char little; } is_endian = {1};
778 unsigned int ctr;
779 #ifdef GCM_FUNCREF_4BIT
780 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
781 #endif
782
783 ctx->Yi.u[0] = 0;
784 ctx->Yi.u[1] = 0;
785 ctx->Xi.u[0] = 0;
786 ctx->Xi.u[1] = 0;
787 ctx->len.u[0] = 0; /* AAD length */
788 ctx->len.u[1] = 0; /* message length */
789 ctx->ares = 0;
790 ctx->mres = 0;
791
792 if (len==12) {
793 memcpy(ctx->Yi.c,iv,12);
794 ctx->Yi.c[15]=1;
795 ctr=1;
796 }
797 else {
798 size_t i;
799 u64 len0 = len;
800
801 while (len>=16) {
802 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
803 GCM_MUL(ctx,Yi);
804 iv += 16;
805 len -= 16;
806 }
807 if (len) {
808 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
809 GCM_MUL(ctx,Yi);
810 }
811 len0 <<= 3;
812 if (is_endian.little) {
813 #ifdef BSWAP8
814 ctx->Yi.u[1] ^= BSWAP8(len0);
815 #else
816 ctx->Yi.c[8] ^= (u8)(len0>>56);
817 ctx->Yi.c[9] ^= (u8)(len0>>48);
818 ctx->Yi.c[10] ^= (u8)(len0>>40);
819 ctx->Yi.c[11] ^= (u8)(len0>>32);
820 ctx->Yi.c[12] ^= (u8)(len0>>24);
821 ctx->Yi.c[13] ^= (u8)(len0>>16);
822 ctx->Yi.c[14] ^= (u8)(len0>>8);
823 ctx->Yi.c[15] ^= (u8)(len0);
824 #endif
825 }
826 else
827 ctx->Yi.u[1] ^= len0;
828
829 GCM_MUL(ctx,Yi);
830
831 if (is_endian.little)
832 #ifdef BSWAP4
833 ctr = BSWAP4(ctx->Yi.d[3]);
834 #else
835 ctr = GETU32(ctx->Yi.c+12);
836 #endif
837 else
838 ctr = ctx->Yi.d[3];
839 }
840
841 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
842 ++ctr;
843 if (is_endian.little)
844 #ifdef BSWAP4
845 ctx->Yi.d[3] = BSWAP4(ctr);
846 #else
847 PUTU32(ctx->Yi.c+12,ctr);
848 #endif
849 else
850 ctx->Yi.d[3] = ctr;
851 }
852
853 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
854 {
855 size_t i;
856 unsigned int n;
857 u64 alen = ctx->len.u[0];
858 #ifdef GCM_FUNCREF_4BIT
859 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
860 # ifdef GHASH
861 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
862 const u8 *inp,size_t len) = ctx->ghash;
863 # endif
864 #endif
865
866 if (ctx->len.u[1]) return -2;
867
868 alen += len;
869 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
870 return -1;
871 ctx->len.u[0] = alen;
872
873 n = ctx->ares;
874 if (n) {
875 while (n && len) {
876 ctx->Xi.c[n] ^= *(aad++);
877 --len;
878 n = (n+1)%16;
879 }
880 if (n==0) GCM_MUL(ctx,Xi);
881 else {
882 ctx->ares = n;
883 return 0;
884 }
885 }
886
887 #ifdef GHASH
888 if ((i = (len&(size_t)-16))) {
889 GHASH(ctx,aad,i);
890 aad += i;
891 len -= i;
892 }
893 #else
894 while (len>=16) {
895 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
896 GCM_MUL(ctx,Xi);
897 aad += 16;
898 len -= 16;
899 }
900 #endif
901 if (len) {
902 n = (unsigned int)len;
903 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
904 }
905
906 ctx->ares = n;
907 return 0;
908 }
909
910 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
911 const unsigned char *in, unsigned char *out,
912 size_t len)
913 {
914 const union { long one; char little; } is_endian = {1};
915 unsigned int n, ctr;
916 size_t i;
917 u64 mlen = ctx->len.u[1];
918 block128_f block = ctx->block;
919 void *key = ctx->key;
920 #ifdef GCM_FUNCREF_4BIT
921 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
922 # ifdef GHASH
923 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
924 const u8 *inp,size_t len) = ctx->ghash;
925 # endif
926 #endif
927
928 #if 0
929 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
930 #endif
931 mlen += len;
932 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
933 return -1;
934 ctx->len.u[1] = mlen;
935
936 if (ctx->ares) {
937 /* First call to encrypt finalizes GHASH(AAD) */
938 GCM_MUL(ctx,Xi);
939 ctx->ares = 0;
940 }
941
942 if (is_endian.little)
943 #ifdef BSWAP4
944 ctr = BSWAP4(ctx->Yi.d[3]);
945 #else
946 ctr = GETU32(ctx->Yi.c+12);
947 #endif
948 else
949 ctr = ctx->Yi.d[3];
950
951 n = ctx->mres;
952 #if !defined(OPENSSL_SMALL_FOOTPRINT)
953 if (16%sizeof(size_t) == 0) do { /* always true actually */
954 if (n) {
955 while (n && len) {
956 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
957 --len;
958 n = (n+1)%16;
959 }
960 if (n==0) GCM_MUL(ctx,Xi);
961 else {
962 ctx->mres = n;
963 return 0;
964 }
965 }
966 #if defined(STRICT_ALIGNMENT)
967 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
968 break;
969 #endif
970 #if defined(GHASH) && defined(GHASH_CHUNK)
971 while (len>=GHASH_CHUNK) {
972 size_t j=GHASH_CHUNK;
973
974 while (j) {
975 size_t *out_t=(size_t *)out;
976 const size_t *in_t=(const size_t *)in;
977
978 (*block)(ctx->Yi.c,ctx->EKi.c,key);
979 ++ctr;
980 if (is_endian.little)
981 #ifdef BSWAP4
982 ctx->Yi.d[3] = BSWAP4(ctr);
983 #else
984 PUTU32(ctx->Yi.c+12,ctr);
985 #endif
986 else
987 ctx->Yi.d[3] = ctr;
988 for (i=0; i<16/sizeof(size_t); ++i)
989 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
990 out += 16;
991 in += 16;
992 j -= 16;
993 }
994 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
995 len -= GHASH_CHUNK;
996 }
997 if ((i = (len&(size_t)-16))) {
998 size_t j=i;
999
1000 while (len>=16) {
1001 size_t *out_t=(size_t *)out;
1002 const size_t *in_t=(const size_t *)in;
1003
1004 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1005 ++ctr;
1006 if (is_endian.little)
1007 #ifdef BSWAP4
1008 ctx->Yi.d[3] = BSWAP4(ctr);
1009 #else
1010 PUTU32(ctx->Yi.c+12,ctr);
1011 #endif
1012 else
1013 ctx->Yi.d[3] = ctr;
1014 for (i=0; i<16/sizeof(size_t); ++i)
1015 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1016 out += 16;
1017 in += 16;
1018 len -= 16;
1019 }
1020 GHASH(ctx,out-j,j);
1021 }
1022 #else
1023 while (len>=16) {
1024 size_t *out_t=(size_t *)out;
1025 const size_t *in_t=(const size_t *)in;
1026
1027 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1028 ++ctr;
1029 if (is_endian.little)
1030 #ifdef BSWAP4
1031 ctx->Yi.d[3] = BSWAP4(ctr);
1032 #else
1033 PUTU32(ctx->Yi.c+12,ctr);
1034 #endif
1035 else
1036 ctx->Yi.d[3] = ctr;
1037 for (i=0; i<16/sizeof(size_t); ++i)
1038 ctx->Xi.t[i] ^=
1039 out_t[i] = in_t[i]^ctx->EKi.t[i];
1040 GCM_MUL(ctx,Xi);
1041 out += 16;
1042 in += 16;
1043 len -= 16;
1044 }
1045 #endif
1046 if (len) {
1047 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1048 ++ctr;
1049 if (is_endian.little)
1050 #ifdef BSWAP4
1051 ctx->Yi.d[3] = BSWAP4(ctr);
1052 #else
1053 PUTU32(ctx->Yi.c+12,ctr);
1054 #endif
1055 else
1056 ctx->Yi.d[3] = ctr;
1057 while (len--) {
1058 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1059 ++n;
1060 }
1061 }
1062
1063 ctx->mres = n;
1064 return 0;
1065 } while(0);
1066 #endif
1067 for (i=0;i<len;++i) {
1068 if (n==0) {
1069 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1070 ++ctr;
1071 if (is_endian.little)
1072 #ifdef BSWAP4
1073 ctx->Yi.d[3] = BSWAP4(ctr);
1074 #else
1075 PUTU32(ctx->Yi.c+12,ctr);
1076 #endif
1077 else
1078 ctx->Yi.d[3] = ctr;
1079 }
1080 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1081 n = (n+1)%16;
1082 if (n==0)
1083 GCM_MUL(ctx,Xi);
1084 }
1085
1086 ctx->mres = n;
1087 return 0;
1088 }
1089
1090 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1091 const unsigned char *in, unsigned char *out,
1092 size_t len)
1093 {
1094 const union { long one; char little; } is_endian = {1};
1095 unsigned int n, ctr;
1096 size_t i;
1097 u64 mlen = ctx->len.u[1];
1098 block128_f block = ctx->block;
1099 void *key = ctx->key;
1100 #ifdef GCM_FUNCREF_4BIT
1101 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1102 # ifdef GHASH
1103 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1104 const u8 *inp,size_t len) = ctx->ghash;
1105 # endif
1106 #endif
1107
1108 mlen += len;
1109 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1110 return -1;
1111 ctx->len.u[1] = mlen;
1112
1113 if (ctx->ares) {
1114 /* First call to decrypt finalizes GHASH(AAD) */
1115 GCM_MUL(ctx,Xi);
1116 ctx->ares = 0;
1117 }
1118
1119 if (is_endian.little)
1120 #ifdef BSWAP4
1121 ctr = BSWAP4(ctx->Yi.d[3]);
1122 #else
1123 ctr = GETU32(ctx->Yi.c+12);
1124 #endif
1125 else
1126 ctr = ctx->Yi.d[3];
1127
1128 n = ctx->mres;
1129 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1130 if (16%sizeof(size_t) == 0) do { /* always true actually */
1131 if (n) {
1132 while (n && len) {
1133 u8 c = *(in++);
1134 *(out++) = c^ctx->EKi.c[n];
1135 ctx->Xi.c[n] ^= c;
1136 --len;
1137 n = (n+1)%16;
1138 }
1139 if (n==0) GCM_MUL (ctx,Xi);
1140 else {
1141 ctx->mres = n;
1142 return 0;
1143 }
1144 }
1145 #if defined(STRICT_ALIGNMENT)
1146 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1147 break;
1148 #endif
1149 #if defined(GHASH) && defined(GHASH_CHUNK)
1150 while (len>=GHASH_CHUNK) {
1151 size_t j=GHASH_CHUNK;
1152
1153 GHASH(ctx,in,GHASH_CHUNK);
1154 while (j) {
1155 size_t *out_t=(size_t *)out;
1156 const size_t *in_t=(const size_t *)in;
1157
1158 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1159 ++ctr;
1160 if (is_endian.little)
1161 #ifdef BSWAP4
1162 ctx->Yi.d[3] = BSWAP4(ctr);
1163 #else
1164 PUTU32(ctx->Yi.c+12,ctr);
1165 #endif
1166 else
1167 ctx->Yi.d[3] = ctr;
1168 for (i=0; i<16/sizeof(size_t); ++i)
1169 out_t[i] = in_t[i]^ctx->EKi.t[i];
1170 out += 16;
1171 in += 16;
1172 j -= 16;
1173 }
1174 len -= GHASH_CHUNK;
1175 }
1176 if ((i = (len&(size_t)-16))) {
1177 GHASH(ctx,in,i);
1178 while (len>=16) {
1179 size_t *out_t=(size_t *)out;
1180 const size_t *in_t=(const size_t *)in;
1181
1182 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1183 ++ctr;
1184 if (is_endian.little)
1185 #ifdef BSWAP4
1186 ctx->Yi.d[3] = BSWAP4(ctr);
1187 #else
1188 PUTU32(ctx->Yi.c+12,ctr);
1189 #endif
1190 else
1191 ctx->Yi.d[3] = ctr;
1192 for (i=0; i<16/sizeof(size_t); ++i)
1193 out_t[i] = in_t[i]^ctx->EKi.t[i];
1194 out += 16;
1195 in += 16;
1196 len -= 16;
1197 }
1198 }
1199 #else
1200 while (len>=16) {
1201 size_t *out_t=(size_t *)out;
1202 const size_t *in_t=(const size_t *)in;
1203
1204 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1205 ++ctr;
1206 if (is_endian.little)
1207 #ifdef BSWAP4
1208 ctx->Yi.d[3] = BSWAP4(ctr);
1209 #else
1210 PUTU32(ctx->Yi.c+12,ctr);
1211 #endif
1212 else
1213 ctx->Yi.d[3] = ctr;
1214 for (i=0; i<16/sizeof(size_t); ++i) {
1215 size_t c = in[i];
1216 out[i] = c^ctx->EKi.t[i];
1217 ctx->Xi.t[i] ^= c;
1218 }
1219 GCM_MUL(ctx,Xi);
1220 out += 16;
1221 in += 16;
1222 len -= 16;
1223 }
1224 #endif
1225 if (len) {
1226 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1227 ++ctr;
1228 if (is_endian.little)
1229 #ifdef BSWAP4
1230 ctx->Yi.d[3] = BSWAP4(ctr);
1231 #else
1232 PUTU32(ctx->Yi.c+12,ctr);
1233 #endif
1234 else
1235 ctx->Yi.d[3] = ctr;
1236 while (len--) {
1237 u8 c = in[n];
1238 ctx->Xi.c[n] ^= c;
1239 out[n] = c^ctx->EKi.c[n];
1240 ++n;
1241 }
1242 }
1243
1244 ctx->mres = n;
1245 return 0;
1246 } while(0);
1247 #endif
1248 for (i=0;i<len;++i) {
1249 u8 c;
1250 if (n==0) {
1251 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1252 ++ctr;
1253 if (is_endian.little)
1254 #ifdef BSWAP4
1255 ctx->Yi.d[3] = BSWAP4(ctr);
1256 #else
1257 PUTU32(ctx->Yi.c+12,ctr);
1258 #endif
1259 else
1260 ctx->Yi.d[3] = ctr;
1261 }
1262 c = in[i];
1263 out[i] = c^ctx->EKi.c[n];
1264 ctx->Xi.c[n] ^= c;
1265 n = (n+1)%16;
1266 if (n==0)
1267 GCM_MUL(ctx,Xi);
1268 }
1269
1270 ctx->mres = n;
1271 return 0;
1272 }
1273
1274 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1275 const unsigned char *in, unsigned char *out,
1276 size_t len, ctr128_f stream)
1277 {
1278 const union { long one; char little; } is_endian = {1};
1279 unsigned int n, ctr;
1280 size_t i;
1281 u64 mlen = ctx->len.u[1];
1282 void *key = ctx->key;
1283 #ifdef GCM_FUNCREF_4BIT
1284 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1285 # ifdef GHASH
1286 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1287 const u8 *inp,size_t len) = ctx->ghash;
1288 # endif
1289 #endif
1290
1291 mlen += len;
1292 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1293 return -1;
1294 ctx->len.u[1] = mlen;
1295
1296 if (ctx->ares) {
1297 /* First call to encrypt finalizes GHASH(AAD) */
1298 GCM_MUL(ctx,Xi);
1299 ctx->ares = 0;
1300 }
1301
1302 if (is_endian.little)
1303 #ifdef BSWAP4
1304 ctr = BSWAP4(ctx->Yi.d[3]);
1305 #else
1306 ctr = GETU32(ctx->Yi.c+12);
1307 #endif
1308 else
1309 ctr = ctx->Yi.d[3];
1310
1311 n = ctx->mres;
1312 if (n) {
1313 while (n && len) {
1314 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1315 --len;
1316 n = (n+1)%16;
1317 }
1318 if (n==0) GCM_MUL(ctx,Xi);
1319 else {
1320 ctx->mres = n;
1321 return 0;
1322 }
1323 }
1324 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1325 while (len>=GHASH_CHUNK) {
1326 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1327 ctr += GHASH_CHUNK/16;
1328 if (is_endian.little)
1329 #ifdef BSWAP4
1330 ctx->Yi.d[3] = BSWAP4(ctr);
1331 #else
1332 PUTU32(ctx->Yi.c+12,ctr);
1333 #endif
1334 else
1335 ctx->Yi.d[3] = ctr;
1336 GHASH(ctx,out,GHASH_CHUNK);
1337 out += GHASH_CHUNK;
1338 in += GHASH_CHUNK;
1339 len -= GHASH_CHUNK;
1340 }
1341 #endif
1342 if ((i = (len&(size_t)-16))) {
1343 size_t j=i/16;
1344
1345 (*stream)(in,out,j,key,ctx->Yi.c);
1346 ctr += (unsigned int)j;
1347 if (is_endian.little)
1348 #ifdef BSWAP4
1349 ctx->Yi.d[3] = BSWAP4(ctr);
1350 #else
1351 PUTU32(ctx->Yi.c+12,ctr);
1352 #endif
1353 else
1354 ctx->Yi.d[3] = ctr;
1355 in += i;
1356 len -= i;
1357 #if defined(GHASH)
1358 GHASH(ctx,out,i);
1359 out += i;
1360 #else
1361 while (j--) {
1362 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1363 GCM_MUL(ctx,Xi);
1364 out += 16;
1365 }
1366 #endif
1367 }
1368 if (len) {
1369 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1370 ++ctr;
1371 if (is_endian.little)
1372 #ifdef BSWAP4
1373 ctx->Yi.d[3] = BSWAP4(ctr);
1374 #else
1375 PUTU32(ctx->Yi.c+12,ctr);
1376 #endif
1377 else
1378 ctx->Yi.d[3] = ctr;
1379 while (len--) {
1380 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1381 ++n;
1382 }
1383 }
1384
1385 ctx->mres = n;
1386 return 0;
1387 }
1388
1389 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1390 const unsigned char *in, unsigned char *out,
1391 size_t len,ctr128_f stream)
1392 {
1393 const union { long one; char little; } is_endian = {1};
1394 unsigned int n, ctr;
1395 size_t i;
1396 u64 mlen = ctx->len.u[1];
1397 void *key = ctx->key;
1398 #ifdef GCM_FUNCREF_4BIT
1399 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1400 # ifdef GHASH
1401 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1402 const u8 *inp,size_t len) = ctx->ghash;
1403 # endif
1404 #endif
1405
1406 mlen += len;
1407 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1408 return -1;
1409 ctx->len.u[1] = mlen;
1410
1411 if (ctx->ares) {
1412 /* First call to decrypt finalizes GHASH(AAD) */
1413 GCM_MUL(ctx,Xi);
1414 ctx->ares = 0;
1415 }
1416
1417 if (is_endian.little)
1418 #ifdef BSWAP4
1419 ctr = BSWAP4(ctx->Yi.d[3]);
1420 #else
1421 ctr = GETU32(ctx->Yi.c+12);
1422 #endif
1423 else
1424 ctr = ctx->Yi.d[3];
1425
1426 n = ctx->mres;
1427 if (n) {
1428 while (n && len) {
1429 u8 c = *(in++);
1430 *(out++) = c^ctx->EKi.c[n];
1431 ctx->Xi.c[n] ^= c;
1432 --len;
1433 n = (n+1)%16;
1434 }
1435 if (n==0) GCM_MUL (ctx,Xi);
1436 else {
1437 ctx->mres = n;
1438 return 0;
1439 }
1440 }
1441 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1442 while (len>=GHASH_CHUNK) {
1443 GHASH(ctx,in,GHASH_CHUNK);
1444 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1445 ctr += GHASH_CHUNK/16;
1446 if (is_endian.little)
1447 #ifdef BSWAP4
1448 ctx->Yi.d[3] = BSWAP4(ctr);
1449 #else
1450 PUTU32(ctx->Yi.c+12,ctr);
1451 #endif
1452 else
1453 ctx->Yi.d[3] = ctr;
1454 out += GHASH_CHUNK;
1455 in += GHASH_CHUNK;
1456 len -= GHASH_CHUNK;
1457 }
1458 #endif
1459 if ((i = (len&(size_t)-16))) {
1460 size_t j=i/16;
1461
1462 #if defined(GHASH)
1463 GHASH(ctx,in,i);
1464 #else
1465 while (j--) {
1466 size_t k;
1467 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1468 GCM_MUL(ctx,Xi);
1469 in += 16;
1470 }
1471 j = i/16;
1472 in -= i;
1473 #endif
1474 (*stream)(in,out,j,key,ctx->Yi.c);
1475 ctr += (unsigned int)j;
1476 if (is_endian.little)
1477 #ifdef BSWAP4
1478 ctx->Yi.d[3] = BSWAP4(ctr);
1479 #else
1480 PUTU32(ctx->Yi.c+12,ctr);
1481 #endif
1482 else
1483 ctx->Yi.d[3] = ctr;
1484 out += i;
1485 in += i;
1486 len -= i;
1487 }
1488 if (len) {
1489 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1490 ++ctr;
1491 if (is_endian.little)
1492 #ifdef BSWAP4
1493 ctx->Yi.d[3] = BSWAP4(ctr);
1494 #else
1495 PUTU32(ctx->Yi.c+12,ctr);
1496 #endif
1497 else
1498 ctx->Yi.d[3] = ctr;
1499 while (len--) {
1500 u8 c = in[n];
1501 ctx->Xi.c[n] ^= c;
1502 out[n] = c^ctx->EKi.c[n];
1503 ++n;
1504 }
1505 }
1506
1507 ctx->mres = n;
1508 return 0;
1509 }
1510
1511 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1512 size_t len)
1513 {
1514 const union { long one; char little; } is_endian = {1};
1515 u64 alen = ctx->len.u[0]<<3;
1516 u64 clen = ctx->len.u[1]<<3;
1517 #ifdef GCM_FUNCREF_4BIT
1518 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1519 #endif
1520
1521 if (ctx->mres || ctx->ares)
1522 GCM_MUL(ctx,Xi);
1523
1524 if (is_endian.little) {
1525 #ifdef BSWAP8
1526 alen = BSWAP8(alen);
1527 clen = BSWAP8(clen);
1528 #else
1529 u8 *p = ctx->len.c;
1530
1531 ctx->len.u[0] = alen;
1532 ctx->len.u[1] = clen;
1533
1534 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1535 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1536 #endif
1537 }
1538
1539 ctx->Xi.u[0] ^= alen;
1540 ctx->Xi.u[1] ^= clen;
1541 GCM_MUL(ctx,Xi);
1542
1543 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1544 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1545
1546 if (tag && len<=sizeof(ctx->Xi))
1547 return memcmp(ctx->Xi.c,tag,len);
1548 else
1549 return -1;
1550 }
1551
1552 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1553 {
1554 CRYPTO_gcm128_finish(ctx, NULL, 0);
1555 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1556 }
1557
1558 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1559 {
1560 GCM128_CONTEXT *ret;
1561
1562 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1563 CRYPTO_gcm128_init(ret,key,block);
1564
1565 return ret;
1566 }
1567
1568 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1569 {
1570 if (ctx) {
1571 OPENSSL_cleanse(ctx,sizeof(*ctx));
1572 OPENSSL_free(ctx);
1573 }
1574 }
1575
1576 #if defined(SELFTEST)
1577 #include <stdio.h>
1578 #include <openssl/aes.h>
1579
1580 /* Test Case 1 */
1581 static const u8 K1[16],
1582 *P1=NULL,
1583 *A1=NULL,
1584 IV1[12],
1585 *C1=NULL,
1586 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1587
1588 /* Test Case 2 */
1589 #define K2 K1
1590 #define A2 A1
1591 #define IV2 IV1
1592 static const u8 P2[16],
1593 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1594 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1595
1596 /* Test Case 3 */
1597 #define A3 A2
1598 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1599 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1600 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1601 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1602 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1603 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1604 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1605 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1606 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1607 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1608 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1609
1610 /* Test Case 4 */
1611 #define K4 K3
1612 #define IV4 IV3
1613 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1614 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1615 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1616 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1617 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1618 0xab,0xad,0xda,0xd2},
1619 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1620 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1621 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1622 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1623 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1624
1625 /* Test Case 5 */
1626 #define K5 K4
1627 #define P5 P4
1628 #define A5 A4
1629 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1630 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1631 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1632 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1633 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1634 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1635
1636 /* Test Case 6 */
1637 #define K6 K5
1638 #define P6 P5
1639 #define A6 A5
1640 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1641 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1642 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1643 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1644 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1645 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1646 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1647 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1648 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1649
1650 /* Test Case 7 */
1651 static const u8 K7[24],
1652 *P7=NULL,
1653 *A7=NULL,
1654 IV7[12],
1655 *C7=NULL,
1656 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1657
1658 /* Test Case 8 */
1659 #define K8 K7
1660 #define IV8 IV7
1661 #define A8 A7
1662 static const u8 P8[16],
1663 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1664 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1665
1666 /* Test Case 9 */
1667 #define A9 A8
1668 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1669 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1670 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1671 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1672 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1673 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1674 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1675 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1676 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1677 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1678 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1679 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1680
1681 /* Test Case 10 */
1682 #define K10 K9
1683 #define IV10 IV9
1684 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1685 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1686 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1687 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1688 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1689 0xab,0xad,0xda,0xd2},
1690 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1691 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1692 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1693 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1694 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1695
1696 /* Test Case 11 */
1697 #define K11 K10
1698 #define P11 P10
1699 #define A11 A10
1700 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1701 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1702 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1703 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1704 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1705 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1706
1707 /* Test Case 12 */
1708 #define K12 K11
1709 #define P12 P11
1710 #define A12 A11
1711 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1712 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1713 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1714 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1715 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1716 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1717 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1718 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1719 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1720
1721 /* Test Case 13 */
1722 static const u8 K13[32],
1723 *P13=NULL,
1724 *A13=NULL,
1725 IV13[12],
1726 *C13=NULL,
1727 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1728
1729 /* Test Case 14 */
1730 #define K14 K13
1731 #define A14 A13
1732 static const u8 P14[16],
1733 IV14[12],
1734 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1735 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1736
1737 /* Test Case 15 */
1738 #define A15 A14
1739 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1740 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1741 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1742 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1743 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1744 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1745 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1746 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1747 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1748 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1749 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1750 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1751
1752 /* Test Case 16 */
1753 #define K16 K15
1754 #define IV16 IV15
1755 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1756 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1757 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1758 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1759 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1760 0xab,0xad,0xda,0xd2},
1761 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1762 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1763 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1764 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1765 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1766
1767 /* Test Case 17 */
1768 #define K17 K16
1769 #define P17 P16
1770 #define A17 A16
1771 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1772 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1773 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1774 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1775 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1776 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1777
1778 /* Test Case 18 */
1779 #define K18 K17
1780 #define P18 P17
1781 #define A18 A17
1782 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1783 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1784 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1785 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1786 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1787 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1788 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1789 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1790 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1791
1792 /* Test Case 19 */
1793 #define K19 K1
1794 #define P19 P1
1795 #define IV19 IV1
1796 #define C19 C1
1797 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1798 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1799 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1800 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1801 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1802 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1803 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1804 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1805 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1806
1807 /* Test Case 20 */
1808 #define K20 K1
1809 #define A20 A1
1810 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1811 P20[288],
1812 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1813 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1814 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1815 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1816 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1817 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1818 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1819 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1820 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1821 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1822 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1823 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1824 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1825 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1826 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1827 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1828 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1829 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1830 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1831
1832 #define TEST_CASE(n) do { \
1833 u8 out[sizeof(P##n)]; \
1834 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1835 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1836 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1837 memset(out,0,sizeof(out)); \
1838 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1839 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1840 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1841 (C##n && memcmp(out,C##n,sizeof(out)))) \
1842 ret++, printf ("encrypt test#%d failed.\n",n); \
1843 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1844 memset(out,0,sizeof(out)); \
1845 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1846 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1847 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1848 (P##n && memcmp(out,P##n,sizeof(out)))) \
1849 ret++, printf ("decrypt test#%d failed.\n",n); \
1850 } while(0)
1851
1852 int main()
1853 {
1854 GCM128_CONTEXT ctx;
1855 AES_KEY key;
1856 int ret=0;
1857
1858 TEST_CASE(1);
1859 TEST_CASE(2);
1860 TEST_CASE(3);
1861 TEST_CASE(4);
1862 TEST_CASE(5);
1863 TEST_CASE(6);
1864 TEST_CASE(7);
1865 TEST_CASE(8);
1866 TEST_CASE(9);
1867 TEST_CASE(10);
1868 TEST_CASE(11);
1869 TEST_CASE(12);
1870 TEST_CASE(13);
1871 TEST_CASE(14);
1872 TEST_CASE(15);
1873 TEST_CASE(16);
1874 TEST_CASE(17);
1875 TEST_CASE(18);
1876 TEST_CASE(19);
1877 TEST_CASE(20);
1878
1879 #ifdef OPENSSL_CPUID_OBJ
1880 {
1881 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1882 union { u64 u; u8 c[1024]; } buf;
1883 int i;
1884
1885 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1886 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1887 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1888
1889 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1890 start = OPENSSL_rdtsc();
1891 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1892 gcm_t = OPENSSL_rdtsc() - start;
1893
1894 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1895 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1896 (block128_f)AES_encrypt);
1897 start = OPENSSL_rdtsc();
1898 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1899 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1900 (block128_f)AES_encrypt);
1901 ctr_t = OPENSSL_rdtsc() - start;
1902
1903 printf("%.2f-%.2f=%.2f\n",
1904 gcm_t/(double)sizeof(buf),
1905 ctr_t/(double)sizeof(buf),
1906 (gcm_t-ctr_t)/(double)sizeof(buf));
1907 #ifdef GHASH
1908 {
1909 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1910 const u8 *inp,size_t len) = ctx.ghash;
1911
1912 GHASH((&ctx),buf.c,sizeof(buf));
1913 start = OPENSSL_rdtsc();
1914 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1915 gcm_t = OPENSSL_rdtsc() - start;
1916 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1917 }
1918 #endif
1919 }
1920 #endif
1921
1922 return ret;
1923 }
1924 #endif
1925