• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49 
50 #define OPENSSL_FIPSAPI
51 
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55 
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 #  define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62 
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef	GETU32
66 #define	GETU32(p)	BSWAP4(*(const u32 *)(p))
67 #undef	PUTU32
68 #define	PUTU32(p,v)	*(u32 *)(p) = BSWAP4(v)
69 #endif
70 
71 #define	PACK(s)		((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V)	do { \
73 	if (sizeof(size_t)==8) { \
74 		u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 		V.lo  = (V.hi<<63)|(V.lo>>1); \
76 		V.hi  = (V.hi>>1 )^T; \
77 	} \
78 	else { \
79 		u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 		V.lo  = (V.hi<<63)|(V.lo>>1); \
81 		V.hi  = (V.hi>>1 )^((u64)T<<32); \
82 	} \
83 } while(0)
84 
85 /*
86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87  * never be set to 8. 8 is effectively reserved for testing purposes.
88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90  * whole spectrum of possible table driven implementations. Why? In
91  * non-"Shoup's" case memory access pattern is segmented in such manner,
92  * that it's trivial to see that cache timing information can reveal
93  * fair portion of intermediate hash value. Given that ciphertext is
94  * always available to attacker, it's possible for him to attempt to
95  * deduce secret parameter H and if successful, tamper with messages
96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97  * not as trivial, but there is no reason to believe that it's resistant
98  * to cache-timing attack. And the thing about "8-bit" implementation is
99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100  * key + 1KB shared. Well, on pros side it should be twice as fast as
101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102  * was observed to run ~75% faster, closer to 100% for commercial
103  * compilers... Yet "4-bit" procedure is preferred, because it's
104  * believed to provide better security-performance balance and adequate
105  * all-round performance. "All-round" refers to things like:
106  *
107  * - shorter setup time effectively improves overall timing for
108  *   handling short messages;
109  * - larger table allocation can become unbearable because of VM
110  *   subsystem penalties (for example on Windows large enough free
111  *   results in VM working set trimming, meaning that consequent
112  *   malloc would immediately incur working set expansion);
113  * - larger table has larger cache footprint, which can affect
114  *   performance of other code paths (not necessarily even from same
115  *   thread in Hyper-Threading world);
116  *
117  * Value of 1 is not appropriate for performance reasons.
118  */
119 #if	TABLE_BITS==8
120 
gcm_init_8bit(u128 Htable[256],u64 H[2])121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123 	int  i, j;
124 	u128 V;
125 
126 	Htable[0].hi = 0;
127 	Htable[0].lo = 0;
128 	V.hi = H[0];
129 	V.lo = H[1];
130 
131 	for (Htable[128]=V, i=64; i>0; i>>=1) {
132 		REDUCE1BIT(V);
133 		Htable[i] = V;
134 	}
135 
136 	for (i=2; i<256; i<<=1) {
137 		u128 *Hi = Htable+i, H0 = *Hi;
138 		for (j=1; j<i; ++j) {
139 			Hi[j].hi = H0.hi^Htable[j].hi;
140 			Hi[j].lo = H0.lo^Htable[j].lo;
141 		}
142 	}
143 }
144 
gcm_gmult_8bit(u64 Xi[2],const u128 Htable[256])145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147 	u128 Z = { 0, 0};
148 	const u8 *xi = (const u8 *)Xi+15;
149 	size_t rem, n = *xi;
150 	const union { long one; char little; } is_endian = {1};
151 	static const size_t rem_8bit[256] = {
152 		PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 		PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 		PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 		PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 		PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 		PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 		PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 		PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 		PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 		PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 		PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 		PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 		PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 		PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 		PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 		PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 		PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 		PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 		PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 		PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 		PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 		PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 		PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 		PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 		PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 		PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 		PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 		PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 		PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 		PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 		PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 		PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 		PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 		PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 		PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 		PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 		PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 		PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 		PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 		PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 		PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 		PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 		PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 		PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 		PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 		PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 		PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 		PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 		PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 		PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 		PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 		PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 		PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 		PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 		PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 		PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 		PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 		PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 		PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 		PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 		PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 		PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 		PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 		PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216 
217 	while (1) {
218 		Z.hi ^= Htable[n].hi;
219 		Z.lo ^= Htable[n].lo;
220 
221 		if ((u8 *)Xi==xi)	break;
222 
223 		n = *(--xi);
224 
225 		rem  = (size_t)Z.lo&0xff;
226 		Z.lo = (Z.hi<<56)|(Z.lo>>8);
227 		Z.hi = (Z.hi>>8);
228 		if (sizeof(size_t)==8)
229 			Z.hi ^= rem_8bit[rem];
230 		else
231 			Z.hi ^= (u64)rem_8bit[rem]<<32;
232 	}
233 
234 	if (is_endian.little) {
235 #ifdef BSWAP8
236 		Xi[0] = BSWAP8(Z.hi);
237 		Xi[1] = BSWAP8(Z.lo);
238 #else
239 		u8 *p = (u8 *)Xi;
240 		u32 v;
241 		v = (u32)(Z.hi>>32);	PUTU32(p,v);
242 		v = (u32)(Z.hi);	PUTU32(p+4,v);
243 		v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
244 		v = (u32)(Z.lo);	PUTU32(p+12,v);
245 #endif
246 	}
247 	else {
248 		Xi[0] = Z.hi;
249 		Xi[1] = Z.lo;
250 	}
251 }
252 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253 
254 #elif	TABLE_BITS==4
255 
gcm_init_4bit(u128 Htable[16],u64 H[2])256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257 {
258 	u128 V;
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260 	int  i;
261 #endif
262 
263 	Htable[0].hi = 0;
264 	Htable[0].lo = 0;
265 	V.hi = H[0];
266 	V.lo = H[1];
267 
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 	for (Htable[8]=V, i=4; i>0; i>>=1) {
270 		REDUCE1BIT(V);
271 		Htable[i] = V;
272 	}
273 
274 	for (i=2; i<16; i<<=1) {
275 		u128 *Hi = Htable+i;
276 		int   j;
277 		for (V=*Hi, j=1; j<i; ++j) {
278 			Hi[j].hi = V.hi^Htable[j].hi;
279 			Hi[j].lo = V.lo^Htable[j].lo;
280 		}
281 	}
282 #else
283 	Htable[8] = V;
284 	REDUCE1BIT(V);
285 	Htable[4] = V;
286 	REDUCE1BIT(V);
287 	Htable[2] = V;
288 	REDUCE1BIT(V);
289 	Htable[1] = V;
290 	Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
291 	V=Htable[4];
292 	Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
293 	Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
294 	Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
295 	V=Htable[8];
296 	Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
297 	Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 	Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 	Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 	Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 	Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 	Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303 #endif
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305 	/*
306 	 * ARM assembler expects specific dword order in Htable.
307 	 */
308 	{
309 	int j;
310 	const union { long one; char little; } is_endian = {1};
311 
312 	if (is_endian.little)
313 		for (j=0;j<16;++j) {
314 			V = Htable[j];
315 			Htable[j].hi = V.lo;
316 			Htable[j].lo = V.hi;
317 		}
318 	else
319 		for (j=0;j<16;++j) {
320 			V = Htable[j];
321 			Htable[j].hi = V.lo<<32|V.lo>>32;
322 			Htable[j].lo = V.hi<<32|V.hi>>32;
323 		}
324 	}
325 #endif
326 }
327 
328 #ifndef GHASH_ASM
329 static const size_t rem_4bit[16] = {
330 	PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 	PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 	PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 	PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334 
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336 {
337 	u128 Z;
338 	int cnt = 15;
339 	size_t rem, nlo, nhi;
340 	const union { long one; char little; } is_endian = {1};
341 
342 	nlo  = ((const u8 *)Xi)[15];
343 	nhi  = nlo>>4;
344 	nlo &= 0xf;
345 
346 	Z.hi = Htable[nlo].hi;
347 	Z.lo = Htable[nlo].lo;
348 
349 	while (1) {
350 		rem  = (size_t)Z.lo&0xf;
351 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
352 		Z.hi = (Z.hi>>4);
353 		if (sizeof(size_t)==8)
354 			Z.hi ^= rem_4bit[rem];
355 		else
356 			Z.hi ^= (u64)rem_4bit[rem]<<32;
357 
358 		Z.hi ^= Htable[nhi].hi;
359 		Z.lo ^= Htable[nhi].lo;
360 
361 		if (--cnt<0)		break;
362 
363 		nlo  = ((const u8 *)Xi)[cnt];
364 		nhi  = nlo>>4;
365 		nlo &= 0xf;
366 
367 		rem  = (size_t)Z.lo&0xf;
368 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
369 		Z.hi = (Z.hi>>4);
370 		if (sizeof(size_t)==8)
371 			Z.hi ^= rem_4bit[rem];
372 		else
373 			Z.hi ^= (u64)rem_4bit[rem]<<32;
374 
375 		Z.hi ^= Htable[nlo].hi;
376 		Z.lo ^= Htable[nlo].lo;
377 	}
378 
379 	if (is_endian.little) {
380 #ifdef BSWAP8
381 		Xi[0] = BSWAP8(Z.hi);
382 		Xi[1] = BSWAP8(Z.lo);
383 #else
384 		u8 *p = (u8 *)Xi;
385 		u32 v;
386 		v = (u32)(Z.hi>>32);	PUTU32(p,v);
387 		v = (u32)(Z.hi);	PUTU32(p+4,v);
388 		v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
389 		v = (u32)(Z.lo);	PUTU32(p+12,v);
390 #endif
391 	}
392 	else {
393 		Xi[0] = Z.hi;
394 		Xi[1] = Z.lo;
395 	}
396 }
397 
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
399 /*
400  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401  * details... Compiler-generated code doesn't seem to give any
402  * performance improvement, at least not on x86[_64]. It's here
403  * mostly as reference and a placeholder for possible future
404  * non-trivial optimization[s]...
405  */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 				const u8 *inp,size_t len)
408 {
409     u128 Z;
410     int cnt;
411     size_t rem, nlo, nhi;
412     const union { long one; char little; } is_endian = {1};
413 
414 #if 1
415     do {
416 	cnt  = 15;
417 	nlo  = ((const u8 *)Xi)[15];
418 	nlo ^= inp[15];
419 	nhi  = nlo>>4;
420 	nlo &= 0xf;
421 
422 	Z.hi = Htable[nlo].hi;
423 	Z.lo = Htable[nlo].lo;
424 
425 	while (1) {
426 		rem  = (size_t)Z.lo&0xf;
427 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
428 		Z.hi = (Z.hi>>4);
429 		if (sizeof(size_t)==8)
430 			Z.hi ^= rem_4bit[rem];
431 		else
432 			Z.hi ^= (u64)rem_4bit[rem]<<32;
433 
434 		Z.hi ^= Htable[nhi].hi;
435 		Z.lo ^= Htable[nhi].lo;
436 
437 		if (--cnt<0)		break;
438 
439 		nlo  = ((const u8 *)Xi)[cnt];
440 		nlo ^= inp[cnt];
441 		nhi  = nlo>>4;
442 		nlo &= 0xf;
443 
444 		rem  = (size_t)Z.lo&0xf;
445 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
446 		Z.hi = (Z.hi>>4);
447 		if (sizeof(size_t)==8)
448 			Z.hi ^= rem_4bit[rem];
449 		else
450 			Z.hi ^= (u64)rem_4bit[rem]<<32;
451 
452 		Z.hi ^= Htable[nlo].hi;
453 		Z.lo ^= Htable[nlo].lo;
454 	}
455 #else
456     /*
457      * Extra 256+16 bytes per-key plus 512 bytes shared tables
458      * [should] give ~50% improvement... One could have PACK()-ed
459      * the rem_8bit even here, but the priority is to minimize
460      * cache footprint...
461      */
462     u128 Hshr4[16];	/* Htable shifted right by 4 bits */
463     u8   Hshl4[16];	/* Htable shifted left  by 4 bits */
464     static const unsigned short rem_8bit[256] = {
465 	0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 	0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 	0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 	0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 	0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 	0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 	0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 	0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 	0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 	0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 	0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 	0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 	0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 	0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 	0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 	0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 	0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 	0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 	0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 	0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 	0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 	0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 	0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 	0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 	0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 	0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 	0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 	0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 	0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 	0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 	0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 	0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497     /*
498      * This pre-processing phase slows down procedure by approximately
499      * same time as it makes each loop spin faster. In other words
500      * single block performance is approximately same as straightforward
501      * "4-bit" implementation, and then it goes only faster...
502      */
503     for (cnt=0; cnt<16; ++cnt) {
504 	Z.hi = Htable[cnt].hi;
505 	Z.lo = Htable[cnt].lo;
506 	Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 	Hshr4[cnt].hi = (Z.hi>>4);
508 	Hshl4[cnt]    = (u8)(Z.lo<<4);
509     }
510 
511     do {
512 	for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513 		nlo  = ((const u8 *)Xi)[cnt];
514 		nlo ^= inp[cnt];
515 		nhi  = nlo>>4;
516 		nlo &= 0xf;
517 
518 		Z.hi ^= Htable[nlo].hi;
519 		Z.lo ^= Htable[nlo].lo;
520 
521 		rem = (size_t)Z.lo&0xff;
522 
523 		Z.lo = (Z.hi<<56)|(Z.lo>>8);
524 		Z.hi = (Z.hi>>8);
525 
526 		Z.hi ^= Hshr4[nhi].hi;
527 		Z.lo ^= Hshr4[nhi].lo;
528 		Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529 	}
530 
531 	nlo  = ((const u8 *)Xi)[0];
532 	nlo ^= inp[0];
533 	nhi  = nlo>>4;
534 	nlo &= 0xf;
535 
536 	Z.hi ^= Htable[nlo].hi;
537 	Z.lo ^= Htable[nlo].lo;
538 
539 	rem = (size_t)Z.lo&0xf;
540 
541 	Z.lo = (Z.hi<<60)|(Z.lo>>4);
542 	Z.hi = (Z.hi>>4);
543 
544 	Z.hi ^= Htable[nhi].hi;
545 	Z.lo ^= Htable[nhi].lo;
546 	Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547 #endif
548 
549 	if (is_endian.little) {
550 #ifdef BSWAP8
551 		Xi[0] = BSWAP8(Z.hi);
552 		Xi[1] = BSWAP8(Z.lo);
553 #else
554 		u8 *p = (u8 *)Xi;
555 		u32 v;
556 		v = (u32)(Z.hi>>32);	PUTU32(p,v);
557 		v = (u32)(Z.hi);	PUTU32(p+4,v);
558 		v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
559 		v = (u32)(Z.lo);	PUTU32(p+12,v);
560 #endif
561 	}
562 	else {
563 		Xi[0] = Z.hi;
564 		Xi[1] = Z.lo;
565 	}
566     } while (inp+=16, len-=16);
567 }
568 #endif
569 #else
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572 #endif
573 
574 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578  * trashing effect. In other words idea is to hash data while it's
579  * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK       (3*1024)
581 #endif
582 
583 #else	/* TABLE_BITS */
584 
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586 {
587 	u128 V,Z = { 0,0 };
588 	long X;
589 	int  i,j;
590 	const long *xi = (const long *)Xi;
591 	const union { long one; char little; } is_endian = {1};
592 
593 	V.hi = H[0];	/* H is in host byte order, no byte swapping */
594 	V.lo = H[1];
595 
596 	for (j=0; j<16/sizeof(long); ++j) {
597 		if (is_endian.little) {
598 			if (sizeof(long)==8) {
599 #ifdef BSWAP8
600 				X = (long)(BSWAP8(xi[j]));
601 #else
602 				const u8 *p = (const u8 *)(xi+j);
603 				X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604 #endif
605 			}
606 			else {
607 				const u8 *p = (const u8 *)(xi+j);
608 				X = (long)GETU32(p);
609 			}
610 		}
611 		else
612 			X = xi[j];
613 
614 		for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 			u64 M = (u64)(X>>(8*sizeof(long)-1));
616 			Z.hi ^= V.hi&M;
617 			Z.lo ^= V.lo&M;
618 
619 			REDUCE1BIT(V);
620 		}
621 	}
622 
623 	if (is_endian.little) {
624 #ifdef BSWAP8
625 		Xi[0] = BSWAP8(Z.hi);
626 		Xi[1] = BSWAP8(Z.lo);
627 #else
628 		u8 *p = (u8 *)Xi;
629 		u32 v;
630 		v = (u32)(Z.hi>>32);	PUTU32(p,v);
631 		v = (u32)(Z.hi);	PUTU32(p+4,v);
632 		v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
633 		v = (u32)(Z.lo);	PUTU32(p+12,v);
634 #endif
635 	}
636 	else {
637 		Xi[0] = Z.hi;
638 		Xi[1] = Z.lo;
639 	}
640 }
641 #define GCM_MUL(ctx,Xi)	  gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642 
643 #endif
644 
645 #if	TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
646 # if	!defined(I386_ONLY) && \
647 	(defined(__i386)	|| defined(__i386__)	|| \
648 	 defined(__x86_64)	|| defined(__x86_64__)	|| \
649 	 defined(_M_IX86)	|| defined(_M_AMD64)	|| defined(_M_X64))
650 #  define GHASH_ASM_X86_OR_64
651 #  define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
653 
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657 
658 #  if	defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 #   define GHASH_ASM_X86
660 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
661 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
662 
663 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
664 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
665 #  endif
666 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
667 #  include "arm_arch.h"
668 #  if __ARM_ARCH__>=7
669 #   define GHASH_ASM_ARM
670 #   define GCM_FUNCREF_4BIT
671 #   define PMULL_CAPABLE	(OPENSSL_armcap_P & ARMV8_PMULL)
672 #   if defined(__arm__) || defined(__arm)
673 #    define NEON_CAPABLE	(OPENSSL_armcap_P & ARMV7_NEON)
674 #   endif
675 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
676 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
677 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
678 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
679 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
680 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
681 #  endif
682 # endif
683 #endif
684 
685 #ifdef GCM_FUNCREF_4BIT
686 # undef  GCM_MUL
687 # define GCM_MUL(ctx,Xi)	(*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
688 # ifdef GHASH
689 #  undef  GHASH
690 #  define GHASH(ctx,in,len)	(*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
691 # endif
692 #endif
693 
694 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
695 {
696 	const union { long one; char little; } is_endian = {1};
697 
698 	memset(ctx,0,sizeof(*ctx));
699 	ctx->block = block;
700 	ctx->key   = key;
701 
702 	(*block)(ctx->H.c,ctx->H.c,key);
703 
704 	if (is_endian.little) {
705 		/* H is stored in host byte order */
706 #ifdef BSWAP8
707 		ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
708 		ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
709 #else
710 		u8 *p = ctx->H.c;
711 		u64 hi,lo;
712 		hi = (u64)GETU32(p)  <<32|GETU32(p+4);
713 		lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
714 		ctx->H.u[0] = hi;
715 		ctx->H.u[1] = lo;
716 #endif
717 	}
718 
719 #if	TABLE_BITS==8
720 	gcm_init_8bit(ctx->Htable,ctx->H.u);
721 #elif	TABLE_BITS==4
722 # if	defined(GHASH_ASM_X86_OR_64)
723 #  if	!defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
724 	if (OPENSSL_ia32cap_P[0]&(1<<24) &&	/* check FXSR bit */
725 	    OPENSSL_ia32cap_P[1]&(1<<1) ) {	/* check PCLMULQDQ bit */
726 		gcm_init_clmul(ctx->Htable,ctx->H.u);
727 		ctx->gmult = gcm_gmult_clmul;
728 		ctx->ghash = gcm_ghash_clmul;
729 		return;
730 	}
731 #  endif
732 	gcm_init_4bit(ctx->Htable,ctx->H.u);
733 #  if	defined(GHASH_ASM_X86)			/* x86 only */
734 #   if	defined(OPENSSL_IA32_SSE2)
735 	if (OPENSSL_ia32cap_P[0]&(1<<25)) {	/* check SSE bit */
736 #   else
737 	if (OPENSSL_ia32cap_P[0]&(1<<23)) {	/* check MMX bit */
738 #   endif
739 		ctx->gmult = gcm_gmult_4bit_mmx;
740 		ctx->ghash = gcm_ghash_4bit_mmx;
741 	} else {
742 		ctx->gmult = gcm_gmult_4bit_x86;
743 		ctx->ghash = gcm_ghash_4bit_x86;
744 	}
745 #  else
746 	ctx->gmult = gcm_gmult_4bit;
747 	ctx->ghash = gcm_ghash_4bit;
748 #  endif
749 # elif	defined(GHASH_ASM_ARM)
750 #  ifdef PMULL_CAPABLE
751 	if (PMULL_CAPABLE) {
752 		gcm_init_v8(ctx->Htable,ctx->H.u);
753 		ctx->gmult = gcm_gmult_v8;
754 		ctx->ghash = gcm_ghash_v8;
755 	} else
756 #  endif
757 #  ifdef NEON_CAPABLE
758 	if (NEON_CAPABLE) {
759 		gcm_init_neon(ctx->Htable,ctx->H.u);
760 		ctx->gmult = gcm_gmult_neon;
761 		ctx->ghash = gcm_ghash_neon;
762 	} else
763 #  endif
764 	{
765 		gcm_init_4bit(ctx->Htable,ctx->H.u);
766 		ctx->gmult = gcm_gmult_4bit;
767 		ctx->ghash = gcm_ghash_4bit;
768 	}
769 # else
770 	gcm_init_4bit(ctx->Htable,ctx->H.u);
771 # endif
772 #endif
773 }
774 
775 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
776 {
777 	const union { long one; char little; } is_endian = {1};
778 	unsigned int ctr;
779 #ifdef GCM_FUNCREF_4BIT
780 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
781 #endif
782 
783 	ctx->Yi.u[0]  = 0;
784 	ctx->Yi.u[1]  = 0;
785 	ctx->Xi.u[0]  = 0;
786 	ctx->Xi.u[1]  = 0;
787 	ctx->len.u[0] = 0;	/* AAD length */
788 	ctx->len.u[1] = 0;	/* message length */
789 	ctx->ares = 0;
790 	ctx->mres = 0;
791 
792 	if (len==12) {
793 		memcpy(ctx->Yi.c,iv,12);
794 		ctx->Yi.c[15]=1;
795 		ctr=1;
796 	}
797 	else {
798 		size_t i;
799 		u64 len0 = len;
800 
801 		while (len>=16) {
802 			for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
803 			GCM_MUL(ctx,Yi);
804 			iv += 16;
805 			len -= 16;
806 		}
807 		if (len) {
808 			for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
809 			GCM_MUL(ctx,Yi);
810 		}
811 		len0 <<= 3;
812 		if (is_endian.little) {
813 #ifdef BSWAP8
814 			ctx->Yi.u[1]  ^= BSWAP8(len0);
815 #else
816 			ctx->Yi.c[8]  ^= (u8)(len0>>56);
817 			ctx->Yi.c[9]  ^= (u8)(len0>>48);
818 			ctx->Yi.c[10] ^= (u8)(len0>>40);
819 			ctx->Yi.c[11] ^= (u8)(len0>>32);
820 			ctx->Yi.c[12] ^= (u8)(len0>>24);
821 			ctx->Yi.c[13] ^= (u8)(len0>>16);
822 			ctx->Yi.c[14] ^= (u8)(len0>>8);
823 			ctx->Yi.c[15] ^= (u8)(len0);
824 #endif
825 		}
826 		else
827 			ctx->Yi.u[1]  ^= len0;
828 
829 		GCM_MUL(ctx,Yi);
830 
831 		if (is_endian.little)
832 #ifdef BSWAP4
833 			ctr = BSWAP4(ctx->Yi.d[3]);
834 #else
835 			ctr = GETU32(ctx->Yi.c+12);
836 #endif
837 		else
838 			ctr = ctx->Yi.d[3];
839 	}
840 
841 	(*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
842 	++ctr;
843 	if (is_endian.little)
844 #ifdef BSWAP4
845 		ctx->Yi.d[3] = BSWAP4(ctr);
846 #else
847 		PUTU32(ctx->Yi.c+12,ctr);
848 #endif
849 	else
850 		ctx->Yi.d[3] = ctr;
851 }
852 
853 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
854 {
855 	size_t i;
856 	unsigned int n;
857 	u64 alen = ctx->len.u[0];
858 #ifdef GCM_FUNCREF_4BIT
859 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
860 # ifdef GHASH
861 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
862 				const u8 *inp,size_t len)	= ctx->ghash;
863 # endif
864 #endif
865 
866 	if (ctx->len.u[1]) return -2;
867 
868 	alen += len;
869 	if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
870 		return -1;
871 	ctx->len.u[0] = alen;
872 
873 	n = ctx->ares;
874 	if (n) {
875 		while (n && len) {
876 			ctx->Xi.c[n] ^= *(aad++);
877 			--len;
878 			n = (n+1)%16;
879 		}
880 		if (n==0) GCM_MUL(ctx,Xi);
881 		else {
882 			ctx->ares = n;
883 			return 0;
884 		}
885 	}
886 
887 #ifdef GHASH
888 	if ((i = (len&(size_t)-16))) {
889 		GHASH(ctx,aad,i);
890 		aad += i;
891 		len -= i;
892 	}
893 #else
894 	while (len>=16) {
895 		for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
896 		GCM_MUL(ctx,Xi);
897 		aad += 16;
898 		len -= 16;
899 	}
900 #endif
901 	if (len) {
902 		n = (unsigned int)len;
903 		for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
904 	}
905 
906 	ctx->ares = n;
907 	return 0;
908 }
909 
910 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
911 		const unsigned char *in, unsigned char *out,
912 		size_t len)
913 {
914 	const union { long one; char little; } is_endian = {1};
915 	unsigned int n, ctr;
916 	size_t i;
917 	u64        mlen  = ctx->len.u[1];
918 	block128_f block = ctx->block;
919 	void      *key   = ctx->key;
920 #ifdef GCM_FUNCREF_4BIT
921 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
922 # ifdef GHASH
923 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
924 				const u8 *inp,size_t len)	= ctx->ghash;
925 # endif
926 #endif
927 
928 #if 0
929 	n = (unsigned int)mlen%16; /* alternative to ctx->mres */
930 #endif
931 	mlen += len;
932 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
933 		return -1;
934 	ctx->len.u[1] = mlen;
935 
936 	if (ctx->ares) {
937 		/* First call to encrypt finalizes GHASH(AAD) */
938 		GCM_MUL(ctx,Xi);
939 		ctx->ares = 0;
940 	}
941 
942 	if (is_endian.little)
943 #ifdef BSWAP4
944 		ctr = BSWAP4(ctx->Yi.d[3]);
945 #else
946 		ctr = GETU32(ctx->Yi.c+12);
947 #endif
948 	else
949 		ctr = ctx->Yi.d[3];
950 
951 	n = ctx->mres;
952 #if !defined(OPENSSL_SMALL_FOOTPRINT)
953 	if (16%sizeof(size_t) == 0) do {	/* always true actually */
954 		if (n) {
955 			while (n && len) {
956 				ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
957 				--len;
958 				n = (n+1)%16;
959 			}
960 			if (n==0) GCM_MUL(ctx,Xi);
961 			else {
962 				ctx->mres = n;
963 				return 0;
964 			}
965 		}
966 #if defined(STRICT_ALIGNMENT)
967 		if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
968 			break;
969 #endif
970 #if defined(GHASH) && defined(GHASH_CHUNK)
971 		while (len>=GHASH_CHUNK) {
972 		    size_t j=GHASH_CHUNK;
973 
974 		    while (j) {
975 		    	size_t *out_t=(size_t *)out;
976 		    	const size_t *in_t=(const size_t *)in;
977 
978 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
979 			++ctr;
980 			if (is_endian.little)
981 #ifdef BSWAP4
982 				ctx->Yi.d[3] = BSWAP4(ctr);
983 #else
984 				PUTU32(ctx->Yi.c+12,ctr);
985 #endif
986 			else
987 				ctx->Yi.d[3] = ctr;
988 			for (i=0; i<16/sizeof(size_t); ++i)
989 				out_t[i] = in_t[i] ^ ctx->EKi.t[i];
990 			out += 16;
991 			in  += 16;
992 			j   -= 16;
993 		    }
994 		    GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
995 		    len -= GHASH_CHUNK;
996 		}
997 		if ((i = (len&(size_t)-16))) {
998 		    size_t j=i;
999 
1000 		    while (len>=16) {
1001 		    	size_t *out_t=(size_t *)out;
1002 		    	const size_t *in_t=(const size_t *)in;
1003 
1004 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1005 			++ctr;
1006 			if (is_endian.little)
1007 #ifdef BSWAP4
1008 				ctx->Yi.d[3] = BSWAP4(ctr);
1009 #else
1010 				PUTU32(ctx->Yi.c+12,ctr);
1011 #endif
1012 			else
1013 				ctx->Yi.d[3] = ctr;
1014 			for (i=0; i<16/sizeof(size_t); ++i)
1015 				out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1016 			out += 16;
1017 			in  += 16;
1018 			len -= 16;
1019 		    }
1020 		    GHASH(ctx,out-j,j);
1021 		}
1022 #else
1023 		while (len>=16) {
1024 		    	size_t *out_t=(size_t *)out;
1025 		    	const size_t *in_t=(const size_t *)in;
1026 
1027 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1028 			++ctr;
1029 			if (is_endian.little)
1030 #ifdef BSWAP4
1031 				ctx->Yi.d[3] = BSWAP4(ctr);
1032 #else
1033 				PUTU32(ctx->Yi.c+12,ctr);
1034 #endif
1035 			else
1036 				ctx->Yi.d[3] = ctr;
1037 			for (i=0; i<16/sizeof(size_t); ++i)
1038 				ctx->Xi.t[i] ^=
1039 				out_t[i] = in_t[i]^ctx->EKi.t[i];
1040 			GCM_MUL(ctx,Xi);
1041 			out += 16;
1042 			in  += 16;
1043 			len -= 16;
1044 		}
1045 #endif
1046 		if (len) {
1047 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1048 			++ctr;
1049 			if (is_endian.little)
1050 #ifdef BSWAP4
1051 				ctx->Yi.d[3] = BSWAP4(ctr);
1052 #else
1053 				PUTU32(ctx->Yi.c+12,ctr);
1054 #endif
1055 			else
1056 				ctx->Yi.d[3] = ctr;
1057 			while (len--) {
1058 				ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1059 				++n;
1060 			}
1061 		}
1062 
1063 		ctx->mres = n;
1064 		return 0;
1065 	} while(0);
1066 #endif
1067 	for (i=0;i<len;++i) {
1068 		if (n==0) {
1069 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1070 			++ctr;
1071 			if (is_endian.little)
1072 #ifdef BSWAP4
1073 				ctx->Yi.d[3] = BSWAP4(ctr);
1074 #else
1075 				PUTU32(ctx->Yi.c+12,ctr);
1076 #endif
1077 			else
1078 				ctx->Yi.d[3] = ctr;
1079 		}
1080 		ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1081 		n = (n+1)%16;
1082 		if (n==0)
1083 			GCM_MUL(ctx,Xi);
1084 	}
1085 
1086 	ctx->mres = n;
1087 	return 0;
1088 }
1089 
1090 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1091 		const unsigned char *in, unsigned char *out,
1092 		size_t len)
1093 {
1094 	const union { long one; char little; } is_endian = {1};
1095 	unsigned int n, ctr;
1096 	size_t i;
1097 	u64        mlen  = ctx->len.u[1];
1098 	block128_f block = ctx->block;
1099 	void      *key   = ctx->key;
1100 #ifdef GCM_FUNCREF_4BIT
1101 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1102 # ifdef GHASH
1103 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1104 				const u8 *inp,size_t len)	= ctx->ghash;
1105 # endif
1106 #endif
1107 
1108 	mlen += len;
1109 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1110 		return -1;
1111 	ctx->len.u[1] = mlen;
1112 
1113 	if (ctx->ares) {
1114 		/* First call to decrypt finalizes GHASH(AAD) */
1115 		GCM_MUL(ctx,Xi);
1116 		ctx->ares = 0;
1117 	}
1118 
1119 	if (is_endian.little)
1120 #ifdef BSWAP4
1121 		ctr = BSWAP4(ctx->Yi.d[3]);
1122 #else
1123 		ctr = GETU32(ctx->Yi.c+12);
1124 #endif
1125 	else
1126 		ctr = ctx->Yi.d[3];
1127 
1128 	n = ctx->mres;
1129 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1130 	if (16%sizeof(size_t) == 0) do {	/* always true actually */
1131 		if (n) {
1132 			while (n && len) {
1133 				u8 c = *(in++);
1134 				*(out++) = c^ctx->EKi.c[n];
1135 				ctx->Xi.c[n] ^= c;
1136 				--len;
1137 				n = (n+1)%16;
1138 			}
1139 			if (n==0) GCM_MUL (ctx,Xi);
1140 			else {
1141 				ctx->mres = n;
1142 				return 0;
1143 			}
1144 		}
1145 #if defined(STRICT_ALIGNMENT)
1146 		if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1147 			break;
1148 #endif
1149 #if defined(GHASH) && defined(GHASH_CHUNK)
1150 		while (len>=GHASH_CHUNK) {
1151 		    size_t j=GHASH_CHUNK;
1152 
1153 		    GHASH(ctx,in,GHASH_CHUNK);
1154 		    while (j) {
1155 		    	size_t *out_t=(size_t *)out;
1156 		    	const size_t *in_t=(const size_t *)in;
1157 
1158 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1159 			++ctr;
1160 			if (is_endian.little)
1161 #ifdef BSWAP4
1162 				ctx->Yi.d[3] = BSWAP4(ctr);
1163 #else
1164 				PUTU32(ctx->Yi.c+12,ctr);
1165 #endif
1166 			else
1167 				ctx->Yi.d[3] = ctr;
1168 			for (i=0; i<16/sizeof(size_t); ++i)
1169 				out_t[i] = in_t[i]^ctx->EKi.t[i];
1170 			out += 16;
1171 			in  += 16;
1172 			j   -= 16;
1173 		    }
1174 		    len -= GHASH_CHUNK;
1175 		}
1176 		if ((i = (len&(size_t)-16))) {
1177 		    GHASH(ctx,in,i);
1178 		    while (len>=16) {
1179 		    	size_t *out_t=(size_t *)out;
1180 		    	const size_t *in_t=(const size_t *)in;
1181 
1182 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1183 			++ctr;
1184 			if (is_endian.little)
1185 #ifdef BSWAP4
1186 				ctx->Yi.d[3] = BSWAP4(ctr);
1187 #else
1188 				PUTU32(ctx->Yi.c+12,ctr);
1189 #endif
1190 			else
1191 				ctx->Yi.d[3] = ctr;
1192 			for (i=0; i<16/sizeof(size_t); ++i)
1193 				out_t[i] = in_t[i]^ctx->EKi.t[i];
1194 			out += 16;
1195 			in  += 16;
1196 			len -= 16;
1197 		    }
1198 		}
1199 #else
1200 		while (len>=16) {
1201 		    	size_t *out_t=(size_t *)out;
1202 		    	const size_t *in_t=(const size_t *)in;
1203 
1204 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1205 			++ctr;
1206 			if (is_endian.little)
1207 #ifdef BSWAP4
1208 				ctx->Yi.d[3] = BSWAP4(ctr);
1209 #else
1210 				PUTU32(ctx->Yi.c+12,ctr);
1211 #endif
1212 			else
1213 				ctx->Yi.d[3] = ctr;
1214 			for (i=0; i<16/sizeof(size_t); ++i) {
1215 				size_t c = in[i];
1216 				out[i] = c^ctx->EKi.t[i];
1217 				ctx->Xi.t[i] ^= c;
1218 			}
1219 			GCM_MUL(ctx,Xi);
1220 			out += 16;
1221 			in  += 16;
1222 			len -= 16;
1223 		}
1224 #endif
1225 		if (len) {
1226 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1227 			++ctr;
1228 			if (is_endian.little)
1229 #ifdef BSWAP4
1230 				ctx->Yi.d[3] = BSWAP4(ctr);
1231 #else
1232 				PUTU32(ctx->Yi.c+12,ctr);
1233 #endif
1234 			else
1235 				ctx->Yi.d[3] = ctr;
1236 			while (len--) {
1237 				u8 c = in[n];
1238 				ctx->Xi.c[n] ^= c;
1239 				out[n] = c^ctx->EKi.c[n];
1240 				++n;
1241 			}
1242 		}
1243 
1244 		ctx->mres = n;
1245 		return 0;
1246 	} while(0);
1247 #endif
1248 	for (i=0;i<len;++i) {
1249 		u8 c;
1250 		if (n==0) {
1251 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1252 			++ctr;
1253 			if (is_endian.little)
1254 #ifdef BSWAP4
1255 				ctx->Yi.d[3] = BSWAP4(ctr);
1256 #else
1257 				PUTU32(ctx->Yi.c+12,ctr);
1258 #endif
1259 			else
1260 				ctx->Yi.d[3] = ctr;
1261 		}
1262 		c = in[i];
1263 		out[i] = c^ctx->EKi.c[n];
1264 		ctx->Xi.c[n] ^= c;
1265 		n = (n+1)%16;
1266 		if (n==0)
1267 			GCM_MUL(ctx,Xi);
1268 	}
1269 
1270 	ctx->mres = n;
1271 	return 0;
1272 }
1273 
1274 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1275 		const unsigned char *in, unsigned char *out,
1276 		size_t len, ctr128_f stream)
1277 {
1278 	const union { long one; char little; } is_endian = {1};
1279 	unsigned int n, ctr;
1280 	size_t i;
1281 	u64   mlen = ctx->len.u[1];
1282 	void *key  = ctx->key;
1283 #ifdef GCM_FUNCREF_4BIT
1284 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1285 # ifdef GHASH
1286 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1287 				const u8 *inp,size_t len)	= ctx->ghash;
1288 # endif
1289 #endif
1290 
1291 	mlen += len;
1292 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1293 		return -1;
1294 	ctx->len.u[1] = mlen;
1295 
1296 	if (ctx->ares) {
1297 		/* First call to encrypt finalizes GHASH(AAD) */
1298 		GCM_MUL(ctx,Xi);
1299 		ctx->ares = 0;
1300 	}
1301 
1302 	if (is_endian.little)
1303 #ifdef BSWAP4
1304 		ctr = BSWAP4(ctx->Yi.d[3]);
1305 #else
1306 		ctr = GETU32(ctx->Yi.c+12);
1307 #endif
1308 	else
1309 		ctr = ctx->Yi.d[3];
1310 
1311 	n = ctx->mres;
1312 	if (n) {
1313 		while (n && len) {
1314 			ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1315 			--len;
1316 			n = (n+1)%16;
1317 		}
1318 		if (n==0) GCM_MUL(ctx,Xi);
1319 		else {
1320 			ctx->mres = n;
1321 			return 0;
1322 		}
1323 	}
1324 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1325 	while (len>=GHASH_CHUNK) {
1326 		(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1327 		ctr += GHASH_CHUNK/16;
1328 		if (is_endian.little)
1329 #ifdef BSWAP4
1330 			ctx->Yi.d[3] = BSWAP4(ctr);
1331 #else
1332 			PUTU32(ctx->Yi.c+12,ctr);
1333 #endif
1334 		else
1335 			ctx->Yi.d[3] = ctr;
1336 		GHASH(ctx,out,GHASH_CHUNK);
1337 		out += GHASH_CHUNK;
1338 		in  += GHASH_CHUNK;
1339 		len -= GHASH_CHUNK;
1340 	}
1341 #endif
1342 	if ((i = (len&(size_t)-16))) {
1343 		size_t j=i/16;
1344 
1345 		(*stream)(in,out,j,key,ctx->Yi.c);
1346 		ctr += (unsigned int)j;
1347 		if (is_endian.little)
1348 #ifdef BSWAP4
1349 			ctx->Yi.d[3] = BSWAP4(ctr);
1350 #else
1351 			PUTU32(ctx->Yi.c+12,ctr);
1352 #endif
1353 		else
1354 			ctx->Yi.d[3] = ctr;
1355 		in  += i;
1356 		len -= i;
1357 #if defined(GHASH)
1358 		GHASH(ctx,out,i);
1359 		out += i;
1360 #else
1361 		while (j--) {
1362 			for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1363 			GCM_MUL(ctx,Xi);
1364 			out += 16;
1365 		}
1366 #endif
1367 	}
1368 	if (len) {
1369 		(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1370 		++ctr;
1371 		if (is_endian.little)
1372 #ifdef BSWAP4
1373 			ctx->Yi.d[3] = BSWAP4(ctr);
1374 #else
1375 			PUTU32(ctx->Yi.c+12,ctr);
1376 #endif
1377 		else
1378 			ctx->Yi.d[3] = ctr;
1379 		while (len--) {
1380 			ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1381 			++n;
1382 		}
1383 	}
1384 
1385 	ctx->mres = n;
1386 	return 0;
1387 }
1388 
1389 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1390 		const unsigned char *in, unsigned char *out,
1391 		size_t len,ctr128_f stream)
1392 {
1393 	const union { long one; char little; } is_endian = {1};
1394 	unsigned int n, ctr;
1395 	size_t i;
1396 	u64   mlen = ctx->len.u[1];
1397 	void *key  = ctx->key;
1398 #ifdef GCM_FUNCREF_4BIT
1399 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1400 # ifdef GHASH
1401 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1402 				const u8 *inp,size_t len)	= ctx->ghash;
1403 # endif
1404 #endif
1405 
1406 	mlen += len;
1407 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1408 		return -1;
1409 	ctx->len.u[1] = mlen;
1410 
1411 	if (ctx->ares) {
1412 		/* First call to decrypt finalizes GHASH(AAD) */
1413 		GCM_MUL(ctx,Xi);
1414 		ctx->ares = 0;
1415 	}
1416 
1417 	if (is_endian.little)
1418 #ifdef BSWAP4
1419 		ctr = BSWAP4(ctx->Yi.d[3]);
1420 #else
1421 		ctr = GETU32(ctx->Yi.c+12);
1422 #endif
1423 	else
1424 		ctr = ctx->Yi.d[3];
1425 
1426 	n = ctx->mres;
1427 	if (n) {
1428 		while (n && len) {
1429 			u8 c = *(in++);
1430 			*(out++) = c^ctx->EKi.c[n];
1431 			ctx->Xi.c[n] ^= c;
1432 			--len;
1433 			n = (n+1)%16;
1434 		}
1435 		if (n==0) GCM_MUL (ctx,Xi);
1436 		else {
1437 			ctx->mres = n;
1438 			return 0;
1439 		}
1440 	}
1441 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1442 	while (len>=GHASH_CHUNK) {
1443 		GHASH(ctx,in,GHASH_CHUNK);
1444 		(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1445 		ctr += GHASH_CHUNK/16;
1446 		if (is_endian.little)
1447 #ifdef BSWAP4
1448 			ctx->Yi.d[3] = BSWAP4(ctr);
1449 #else
1450 			PUTU32(ctx->Yi.c+12,ctr);
1451 #endif
1452 		else
1453 			ctx->Yi.d[3] = ctr;
1454 		out += GHASH_CHUNK;
1455 		in  += GHASH_CHUNK;
1456 		len -= GHASH_CHUNK;
1457 	}
1458 #endif
1459 	if ((i = (len&(size_t)-16))) {
1460 		size_t j=i/16;
1461 
1462 #if defined(GHASH)
1463 		GHASH(ctx,in,i);
1464 #else
1465 		while (j--) {
1466 			size_t k;
1467 			for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1468 			GCM_MUL(ctx,Xi);
1469 			in += 16;
1470 		}
1471 		j   = i/16;
1472 		in -= i;
1473 #endif
1474 		(*stream)(in,out,j,key,ctx->Yi.c);
1475 		ctr += (unsigned int)j;
1476 		if (is_endian.little)
1477 #ifdef BSWAP4
1478 			ctx->Yi.d[3] = BSWAP4(ctr);
1479 #else
1480 			PUTU32(ctx->Yi.c+12,ctr);
1481 #endif
1482 		else
1483 			ctx->Yi.d[3] = ctr;
1484 		out += i;
1485 		in  += i;
1486 		len -= i;
1487 	}
1488 	if (len) {
1489 		(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1490 		++ctr;
1491 		if (is_endian.little)
1492 #ifdef BSWAP4
1493 			ctx->Yi.d[3] = BSWAP4(ctr);
1494 #else
1495 			PUTU32(ctx->Yi.c+12,ctr);
1496 #endif
1497 		else
1498 			ctx->Yi.d[3] = ctr;
1499 		while (len--) {
1500 			u8 c = in[n];
1501 			ctx->Xi.c[n] ^= c;
1502 			out[n] = c^ctx->EKi.c[n];
1503 			++n;
1504 		}
1505 	}
1506 
1507 	ctx->mres = n;
1508 	return 0;
1509 }
1510 
1511 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1512 			size_t len)
1513 {
1514 	const union { long one; char little; } is_endian = {1};
1515 	u64 alen = ctx->len.u[0]<<3;
1516 	u64 clen = ctx->len.u[1]<<3;
1517 #ifdef GCM_FUNCREF_4BIT
1518 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1519 #endif
1520 
1521 	if (ctx->mres || ctx->ares)
1522 		GCM_MUL(ctx,Xi);
1523 
1524 	if (is_endian.little) {
1525 #ifdef BSWAP8
1526 		alen = BSWAP8(alen);
1527 		clen = BSWAP8(clen);
1528 #else
1529 		u8 *p = ctx->len.c;
1530 
1531 		ctx->len.u[0] = alen;
1532 		ctx->len.u[1] = clen;
1533 
1534 		alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1535 		clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1536 #endif
1537 	}
1538 
1539 	ctx->Xi.u[0] ^= alen;
1540 	ctx->Xi.u[1] ^= clen;
1541 	GCM_MUL(ctx,Xi);
1542 
1543 	ctx->Xi.u[0] ^= ctx->EK0.u[0];
1544 	ctx->Xi.u[1] ^= ctx->EK0.u[1];
1545 
1546 	if (tag && len<=sizeof(ctx->Xi))
1547 		return memcmp(ctx->Xi.c,tag,len);
1548 	else
1549 		return -1;
1550 }
1551 
1552 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1553 {
1554 	CRYPTO_gcm128_finish(ctx, NULL, 0);
1555 	memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1556 }
1557 
1558 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1559 {
1560 	GCM128_CONTEXT *ret;
1561 
1562 	if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1563 		CRYPTO_gcm128_init(ret,key,block);
1564 
1565 	return ret;
1566 }
1567 
1568 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1569 {
1570 	if (ctx) {
1571 		OPENSSL_cleanse(ctx,sizeof(*ctx));
1572 		OPENSSL_free(ctx);
1573 	}
1574 }
1575 
1576 #if defined(SELFTEST)
1577 #include <stdio.h>
1578 #include <openssl/aes.h>
1579 
1580 /* Test Case 1 */
1581 static const u8	K1[16],
1582 		*P1=NULL,
1583 		*A1=NULL,
1584 		IV1[12],
1585 		*C1=NULL,
1586 		T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1587 
1588 /* Test Case 2 */
1589 #define K2 K1
1590 #define A2 A1
1591 #define IV2 IV1
1592 static const u8	P2[16],
1593 		C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1594 		T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1595 
1596 /* Test Case 3 */
1597 #define A3 A2
1598 static const u8	K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1599 		P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1600 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1601 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1602 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1603 		IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1604 		C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1605 			0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1606 			0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1607 			0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1608 		T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1609 
1610 /* Test Case 4 */
1611 #define K4 K3
1612 #define IV4 IV3
1613 static const u8	P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1614 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1615 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1616 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1617 		A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1618 			0xab,0xad,0xda,0xd2},
1619 		C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1620 			0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1621 			0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1622 			0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1623 		T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1624 
1625 /* Test Case 5 */
1626 #define K5 K4
1627 #define P5 P4
1628 #define A5 A4
1629 static const u8	IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1630 		C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1631 			0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1632 			0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1633 			0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1634 		T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1635 
1636 /* Test Case 6 */
1637 #define K6 K5
1638 #define P6 P5
1639 #define A6 A5
1640 static const u8	IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1641 			0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1642 			0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1643 			0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1644 		C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1645 			0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1646 			0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1647 			0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1648 		T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1649 
1650 /* Test Case 7 */
1651 static const u8 K7[24],
1652 		*P7=NULL,
1653 		*A7=NULL,
1654 		IV7[12],
1655 		*C7=NULL,
1656 		T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1657 
1658 /* Test Case 8 */
1659 #define K8 K7
1660 #define IV8 IV7
1661 #define A8 A7
1662 static const u8	P8[16],
1663 		C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1664 		T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1665 
1666 /* Test Case 9 */
1667 #define A9 A8
1668 static const u8	K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1669 			0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1670 		P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1671 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1672 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1673 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1674 		IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1675 		C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1676 			0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1677 			0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1678 			0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1679 		T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1680 
1681 /* Test Case 10 */
1682 #define K10 K9
1683 #define IV10 IV9
1684 static const u8	P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1685 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1686 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1687 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1688 		A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1689 			0xab,0xad,0xda,0xd2},
1690 		C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1691 			0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1692 			0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1693 			0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1694 		T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1695 
1696 /* Test Case 11 */
1697 #define K11 K10
1698 #define P11 P10
1699 #define A11 A10
1700 static const u8	IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1701 		C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1702 			0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1703 			0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1704 			0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1705 		T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1706 
1707 /* Test Case 12 */
1708 #define K12 K11
1709 #define P12 P11
1710 #define A12 A11
1711 static const u8	IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1712 			0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1713 			0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1714 			0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1715 		C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1716 			0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1717 			0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1718 			0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1719 		T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1720 
1721 /* Test Case 13 */
1722 static const u8	K13[32],
1723 		*P13=NULL,
1724 		*A13=NULL,
1725 		IV13[12],
1726 		*C13=NULL,
1727 		T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1728 
1729 /* Test Case 14 */
1730 #define K14 K13
1731 #define A14 A13
1732 static const u8	P14[16],
1733 		IV14[12],
1734 		C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1735 		T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1736 
1737 /* Test Case 15 */
1738 #define A15 A14
1739 static const u8	K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1740 			0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1741 		P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1742 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1743 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1744 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1745 		IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1746 		C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1747 			0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1748 			0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1749 			0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1750 		T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1751 
1752 /* Test Case 16 */
1753 #define K16 K15
1754 #define IV16 IV15
1755 static const u8	P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1756 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1757 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1758 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1759 		A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1760 			0xab,0xad,0xda,0xd2},
1761 		C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1762 			0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1763 			0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1764 			0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1765 		T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1766 
1767 /* Test Case 17 */
1768 #define K17 K16
1769 #define P17 P16
1770 #define A17 A16
1771 static const u8	IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1772 		C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1773 			0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1774 			0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1775 			0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1776 		T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1777 
1778 /* Test Case 18 */
1779 #define K18 K17
1780 #define P18 P17
1781 #define A18 A17
1782 static const u8	IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1783 			0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1784 			0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1785 			0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1786 		C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1787 			0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1788 			0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1789 			0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1790 		T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1791 
1792 /* Test Case 19 */
1793 #define K19 K1
1794 #define P19 P1
1795 #define IV19 IV1
1796 #define C19 C1
1797 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1798 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1799 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1800 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1801 			0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1802 			0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1803 			0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1804 			0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1805 		T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1806 
1807 /* Test Case 20 */
1808 #define K20 K1
1809 #define A20 A1
1810 static const u8 IV20[64]={0xff,0xff,0xff,0xff},	/* this results in 0xff in counter LSB */
1811 		P20[288],
1812 		C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1813 			0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1814 			0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1815 			0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1816 			0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1817 			0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1818 			0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1819 			0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1820 			0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1821 			0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1822 			0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1823 			0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1824 			0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1825 			0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1826 			0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1827 			0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1828 			0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1829 			0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1830 		T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1831 
1832 #define TEST_CASE(n)	do {					\
1833 	u8 out[sizeof(P##n)];					\
1834 	AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);		\
1835 	CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);	\
1836 	CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));		\
1837 	memset(out,0,sizeof(out));				\
1838 	if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));	\
1839 	if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));	\
1840 	if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||		\
1841 	    (C##n && memcmp(out,C##n,sizeof(out))))		\
1842 		ret++, printf ("encrypt test#%d failed.\n",n);	\
1843 	CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));		\
1844 	memset(out,0,sizeof(out));				\
1845 	if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));	\
1846 	if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));	\
1847 	if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||		\
1848 	    (P##n && memcmp(out,P##n,sizeof(out))))		\
1849 		ret++, printf ("decrypt test#%d failed.\n",n);	\
1850 	} while(0)
1851 
1852 int main()
1853 {
1854 	GCM128_CONTEXT ctx;
1855 	AES_KEY key;
1856 	int ret=0;
1857 
1858 	TEST_CASE(1);
1859 	TEST_CASE(2);
1860 	TEST_CASE(3);
1861 	TEST_CASE(4);
1862 	TEST_CASE(5);
1863 	TEST_CASE(6);
1864 	TEST_CASE(7);
1865 	TEST_CASE(8);
1866 	TEST_CASE(9);
1867 	TEST_CASE(10);
1868 	TEST_CASE(11);
1869 	TEST_CASE(12);
1870 	TEST_CASE(13);
1871 	TEST_CASE(14);
1872 	TEST_CASE(15);
1873 	TEST_CASE(16);
1874 	TEST_CASE(17);
1875 	TEST_CASE(18);
1876 	TEST_CASE(19);
1877 	TEST_CASE(20);
1878 
1879 #ifdef OPENSSL_CPUID_OBJ
1880 	{
1881 	size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1882 	union { u64 u; u8 c[1024]; } buf;
1883 	int i;
1884 
1885 	AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1886 	CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1887 	CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1888 
1889 	CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1890 	start = OPENSSL_rdtsc();
1891 	CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1892 	gcm_t = OPENSSL_rdtsc() - start;
1893 
1894 	CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1895 			&key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1896 			(block128_f)AES_encrypt);
1897 	start = OPENSSL_rdtsc();
1898 	CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1899 			&key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1900 			(block128_f)AES_encrypt);
1901 	ctr_t = OPENSSL_rdtsc() - start;
1902 
1903 	printf("%.2f-%.2f=%.2f\n",
1904 			gcm_t/(double)sizeof(buf),
1905 			ctr_t/(double)sizeof(buf),
1906 			(gcm_t-ctr_t)/(double)sizeof(buf));
1907 #ifdef GHASH
1908 	{
1909 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1910 				const u8 *inp,size_t len)	= ctx.ghash;
1911 
1912 	GHASH((&ctx),buf.c,sizeof(buf));
1913 	start = OPENSSL_rdtsc();
1914 	for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1915 	gcm_t = OPENSSL_rdtsc() - start;
1916 	printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1917 	}
1918 #endif
1919 	}
1920 #endif
1921 
1922 	return ret;
1923 }
1924 #endif
1925