• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* gf128mul.c - GF(2^128) multiplication functions
2  *
3  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
4  * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
5  *
6  * Based on Dr Brian Gladman's (GPL'd) work published at
7  * http://fp.gladman.plus.com/cryptography_technology/index.htm
8  * See the original copyright notice below.
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 2 of the License, or (at your option)
13  * any later version.
14  */
15 
16 /*
17  ---------------------------------------------------------------------------
18  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
19 
20  LICENSE TERMS
21 
22  The free distribution and use of this software in both source and binary
23  form is allowed (with or without changes) provided that:
24 
25    1. distributions of this source code include the above copyright
26       notice, this list of conditions and the following disclaimer;
27 
28    2. distributions in binary form include the above copyright
29       notice, this list of conditions and the following disclaimer
30       in the documentation and/or other associated materials;
31 
32    3. the copyright holder's name is not used to endorse products
33       built using this software without specific written permission.
34 
35  ALTERNATIVELY, provided that this notice is retained in full, this product
36  may be distributed under the terms of the GNU General Public License (GPL),
37  in which case the provisions of the GPL apply INSTEAD OF those given above.
38 
39  DISCLAIMER
40 
41  This software is provided 'as is' with no explicit or implied warranties
42  in respect of its properties, including, but not limited to, correctness
43  and/or fitness for purpose.
44  ---------------------------------------------------------------------------
45  Issue 31/01/2006
46 
47  This file provides fast multiplication in GF(128) as required by several
48  cryptographic authentication modes
49 */
50 
51 #include <crypto/gf128mul.h>
52 #include <linux/kernel.h>
53 #include <linux/module.h>
54 #include <linux/slab.h>
55 
56 #define gf128mul_dat(q) { \
57 	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
58 	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
59 	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
60 	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
61 	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
62 	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
63 	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
64 	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
65 	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
66 	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
67 	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
68 	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
69 	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
70 	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
71 	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
72 	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
73 	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
74 	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
75 	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
76 	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
77 	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
78 	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
79 	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
80 	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
81 	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
82 	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
83 	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
84 	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
85 	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
86 	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
87 	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
88 	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
89 }
90 
91 /*	Given the value i in 0..255 as the byte overflow when a field element
92     in GHASH is multipled by x^8, this function will return the values that
93     are generated in the lo 16-bit word of the field value by applying the
94     modular polynomial. The values lo_byte and hi_byte are returned via the
95     macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
96     memory as required by a suitable definition of this macro operating on
97     the table above
98 */
99 
100 #define xx(p, q)	0x##p##q
101 
102 #define xda_bbe(i) ( \
103 	(i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
104 	(i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
105 	(i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
106 	(i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
107 )
108 
109 #define xda_lle(i) ( \
110 	(i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
111 	(i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
112 	(i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
113 	(i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
114 )
115 
116 static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
117 static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
118 
119 /* These functions multiply a field element by x, by x^4 and by x^8
120  * in the polynomial field representation. It uses 32-bit word operations
121  * to gain speed but compensates for machine endianess and hence works
122  * correctly on both styles of machine.
123  */
124 
gf128mul_x_lle(be128 * r,const be128 * x)125 static void gf128mul_x_lle(be128 *r, const be128 *x)
126 {
127 	u64 a = be64_to_cpu(x->a);
128 	u64 b = be64_to_cpu(x->b);
129 	u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
130 
131 	r->b = cpu_to_be64((b >> 1) | (a << 63));
132 	r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
133 }
134 
gf128mul_x_bbe(be128 * r,const be128 * x)135 static void gf128mul_x_bbe(be128 *r, const be128 *x)
136 {
137 	u64 a = be64_to_cpu(x->a);
138 	u64 b = be64_to_cpu(x->b);
139 	u64 _tt = gf128mul_table_bbe[a >> 63];
140 
141 	r->a = cpu_to_be64((a << 1) | (b >> 63));
142 	r->b = cpu_to_be64((b << 1) ^ _tt);
143 }
144 
gf128mul_x_ble(be128 * r,const be128 * x)145 void gf128mul_x_ble(be128 *r, const be128 *x)
146 {
147 	u64 a = le64_to_cpu(x->a);
148 	u64 b = le64_to_cpu(x->b);
149 	u64 _tt = gf128mul_table_bbe[b >> 63];
150 
151 	r->a = cpu_to_le64((a << 1) ^ _tt);
152 	r->b = cpu_to_le64((b << 1) | (a >> 63));
153 }
154 EXPORT_SYMBOL(gf128mul_x_ble);
155 
gf128mul_x8_lle(be128 * x)156 static void gf128mul_x8_lle(be128 *x)
157 {
158 	u64 a = be64_to_cpu(x->a);
159 	u64 b = be64_to_cpu(x->b);
160 	u64 _tt = gf128mul_table_lle[b & 0xff];
161 
162 	x->b = cpu_to_be64((b >> 8) | (a << 56));
163 	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
164 }
165 
gf128mul_x8_bbe(be128 * x)166 static void gf128mul_x8_bbe(be128 *x)
167 {
168 	u64 a = be64_to_cpu(x->a);
169 	u64 b = be64_to_cpu(x->b);
170 	u64 _tt = gf128mul_table_bbe[a >> 56];
171 
172 	x->a = cpu_to_be64((a << 8) | (b >> 56));
173 	x->b = cpu_to_be64((b << 8) ^ _tt);
174 }
175 
gf128mul_lle(be128 * r,const be128 * b)176 void gf128mul_lle(be128 *r, const be128 *b)
177 {
178 	be128 p[8];
179 	int i;
180 
181 	p[0] = *r;
182 	for (i = 0; i < 7; ++i)
183 		gf128mul_x_lle(&p[i + 1], &p[i]);
184 
185 	memset(r, 0, sizeof(r));
186 	for (i = 0;;) {
187 		u8 ch = ((u8 *)b)[15 - i];
188 
189 		if (ch & 0x80)
190 			be128_xor(r, r, &p[0]);
191 		if (ch & 0x40)
192 			be128_xor(r, r, &p[1]);
193 		if (ch & 0x20)
194 			be128_xor(r, r, &p[2]);
195 		if (ch & 0x10)
196 			be128_xor(r, r, &p[3]);
197 		if (ch & 0x08)
198 			be128_xor(r, r, &p[4]);
199 		if (ch & 0x04)
200 			be128_xor(r, r, &p[5]);
201 		if (ch & 0x02)
202 			be128_xor(r, r, &p[6]);
203 		if (ch & 0x01)
204 			be128_xor(r, r, &p[7]);
205 
206 		if (++i >= 16)
207 			break;
208 
209 		gf128mul_x8_lle(r);
210 	}
211 }
212 EXPORT_SYMBOL(gf128mul_lle);
213 
gf128mul_bbe(be128 * r,const be128 * b)214 void gf128mul_bbe(be128 *r, const be128 *b)
215 {
216 	be128 p[8];
217 	int i;
218 
219 	p[0] = *r;
220 	for (i = 0; i < 7; ++i)
221 		gf128mul_x_bbe(&p[i + 1], &p[i]);
222 
223 	memset(r, 0, sizeof(r));
224 	for (i = 0;;) {
225 		u8 ch = ((u8 *)b)[i];
226 
227 		if (ch & 0x80)
228 			be128_xor(r, r, &p[7]);
229 		if (ch & 0x40)
230 			be128_xor(r, r, &p[6]);
231 		if (ch & 0x20)
232 			be128_xor(r, r, &p[5]);
233 		if (ch & 0x10)
234 			be128_xor(r, r, &p[4]);
235 		if (ch & 0x08)
236 			be128_xor(r, r, &p[3]);
237 		if (ch & 0x04)
238 			be128_xor(r, r, &p[2]);
239 		if (ch & 0x02)
240 			be128_xor(r, r, &p[1]);
241 		if (ch & 0x01)
242 			be128_xor(r, r, &p[0]);
243 
244 		if (++i >= 16)
245 			break;
246 
247 		gf128mul_x8_bbe(r);
248 	}
249 }
250 EXPORT_SYMBOL(gf128mul_bbe);
251 
252 /*      This version uses 64k bytes of table space.
253     A 16 byte buffer has to be multiplied by a 16 byte key
254     value in GF(128).  If we consider a GF(128) value in
255     the buffer's lowest byte, we can construct a table of
256     the 256 16 byte values that result from the 256 values
257     of this byte.  This requires 4096 bytes. But we also
258     need tables for each of the 16 higher bytes in the
259     buffer as well, which makes 64 kbytes in total.
260 */
261 /* additional explanation
262  * t[0][BYTE] contains g*BYTE
263  * t[1][BYTE] contains g*x^8*BYTE
264  *  ..
265  * t[15][BYTE] contains g*x^120*BYTE */
gf128mul_init_64k_lle(const be128 * g)266 struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g)
267 {
268 	struct gf128mul_64k *t;
269 	int i, j, k;
270 
271 	t = kzalloc(sizeof(*t), GFP_KERNEL);
272 	if (!t)
273 		goto out;
274 
275 	for (i = 0; i < 16; i++) {
276 		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
277 		if (!t->t[i]) {
278 			gf128mul_free_64k(t);
279 			t = NULL;
280 			goto out;
281 		}
282 	}
283 
284 	t->t[0]->t[128] = *g;
285 	for (j = 64; j > 0; j >>= 1)
286 		gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
287 
288 	for (i = 0;;) {
289 		for (j = 2; j < 256; j += j)
290 			for (k = 1; k < j; ++k)
291 				be128_xor(&t->t[i]->t[j + k],
292 					  &t->t[i]->t[j], &t->t[i]->t[k]);
293 
294 		if (++i >= 16)
295 			break;
296 
297 		for (j = 128; j > 0; j >>= 1) {
298 			t->t[i]->t[j] = t->t[i - 1]->t[j];
299 			gf128mul_x8_lle(&t->t[i]->t[j]);
300 		}
301 	}
302 
303 out:
304 	return t;
305 }
306 EXPORT_SYMBOL(gf128mul_init_64k_lle);
307 
gf128mul_init_64k_bbe(const be128 * g)308 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
309 {
310 	struct gf128mul_64k *t;
311 	int i, j, k;
312 
313 	t = kzalloc(sizeof(*t), GFP_KERNEL);
314 	if (!t)
315 		goto out;
316 
317 	for (i = 0; i < 16; i++) {
318 		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
319 		if (!t->t[i]) {
320 			gf128mul_free_64k(t);
321 			t = NULL;
322 			goto out;
323 		}
324 	}
325 
326 	t->t[0]->t[1] = *g;
327 	for (j = 1; j <= 64; j <<= 1)
328 		gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
329 
330 	for (i = 0;;) {
331 		for (j = 2; j < 256; j += j)
332 			for (k = 1; k < j; ++k)
333 				be128_xor(&t->t[i]->t[j + k],
334 					  &t->t[i]->t[j], &t->t[i]->t[k]);
335 
336 		if (++i >= 16)
337 			break;
338 
339 		for (j = 128; j > 0; j >>= 1) {
340 			t->t[i]->t[j] = t->t[i - 1]->t[j];
341 			gf128mul_x8_bbe(&t->t[i]->t[j]);
342 		}
343 	}
344 
345 out:
346 	return t;
347 }
348 EXPORT_SYMBOL(gf128mul_init_64k_bbe);
349 
gf128mul_free_64k(struct gf128mul_64k * t)350 void gf128mul_free_64k(struct gf128mul_64k *t)
351 {
352 	int i;
353 
354 	for (i = 0; i < 16; i++)
355 		kfree(t->t[i]);
356 	kfree(t);
357 }
358 EXPORT_SYMBOL(gf128mul_free_64k);
359 
gf128mul_64k_lle(be128 * a,struct gf128mul_64k * t)360 void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t)
361 {
362 	u8 *ap = (u8 *)a;
363 	be128 r[1];
364 	int i;
365 
366 	*r = t->t[0]->t[ap[0]];
367 	for (i = 1; i < 16; ++i)
368 		be128_xor(r, r, &t->t[i]->t[ap[i]]);
369 	*a = *r;
370 }
371 EXPORT_SYMBOL(gf128mul_64k_lle);
372 
gf128mul_64k_bbe(be128 * a,struct gf128mul_64k * t)373 void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
374 {
375 	u8 *ap = (u8 *)a;
376 	be128 r[1];
377 	int i;
378 
379 	*r = t->t[0]->t[ap[15]];
380 	for (i = 1; i < 16; ++i)
381 		be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
382 	*a = *r;
383 }
384 EXPORT_SYMBOL(gf128mul_64k_bbe);
385 
386 /*      This version uses 4k bytes of table space.
387     A 16 byte buffer has to be multiplied by a 16 byte key
388     value in GF(128).  If we consider a GF(128) value in a
389     single byte, we can construct a table of the 256 16 byte
390     values that result from the 256 values of this byte.
391     This requires 4096 bytes. If we take the highest byte in
392     the buffer and use this table to get the result, we then
393     have to multiply by x^120 to get the final value. For the
394     next highest byte the result has to be multiplied by x^112
395     and so on. But we can do this by accumulating the result
396     in an accumulator starting with the result for the top
397     byte.  We repeatedly multiply the accumulator value by
398     x^8 and then add in (i.e. xor) the 16 bytes of the next
399     lower byte in the buffer, stopping when we reach the
400     lowest byte. This requires a 4096 byte table.
401 */
gf128mul_init_4k_lle(const be128 * g)402 struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
403 {
404 	struct gf128mul_4k *t;
405 	int j, k;
406 
407 	t = kzalloc(sizeof(*t), GFP_KERNEL);
408 	if (!t)
409 		goto out;
410 
411 	t->t[128] = *g;
412 	for (j = 64; j > 0; j >>= 1)
413 		gf128mul_x_lle(&t->t[j], &t->t[j+j]);
414 
415 	for (j = 2; j < 256; j += j)
416 		for (k = 1; k < j; ++k)
417 			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
418 
419 out:
420 	return t;
421 }
422 EXPORT_SYMBOL(gf128mul_init_4k_lle);
423 
gf128mul_init_4k_bbe(const be128 * g)424 struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
425 {
426 	struct gf128mul_4k *t;
427 	int j, k;
428 
429 	t = kzalloc(sizeof(*t), GFP_KERNEL);
430 	if (!t)
431 		goto out;
432 
433 	t->t[1] = *g;
434 	for (j = 1; j <= 64; j <<= 1)
435 		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
436 
437 	for (j = 2; j < 256; j += j)
438 		for (k = 1; k < j; ++k)
439 			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
440 
441 out:
442 	return t;
443 }
444 EXPORT_SYMBOL(gf128mul_init_4k_bbe);
445 
gf128mul_4k_lle(be128 * a,struct gf128mul_4k * t)446 void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
447 {
448 	u8 *ap = (u8 *)a;
449 	be128 r[1];
450 	int i = 15;
451 
452 	*r = t->t[ap[15]];
453 	while (i--) {
454 		gf128mul_x8_lle(r);
455 		be128_xor(r, r, &t->t[ap[i]]);
456 	}
457 	*a = *r;
458 }
459 EXPORT_SYMBOL(gf128mul_4k_lle);
460 
gf128mul_4k_bbe(be128 * a,struct gf128mul_4k * t)461 void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
462 {
463 	u8 *ap = (u8 *)a;
464 	be128 r[1];
465 	int i = 0;
466 
467 	*r = t->t[ap[0]];
468 	while (++i < 16) {
469 		gf128mul_x8_bbe(r);
470 		be128_xor(r, r, &t->t[ap[i]]);
471 	}
472 	*a = *r;
473 }
474 EXPORT_SYMBOL(gf128mul_4k_bbe);
475 
476 MODULE_LICENSE("GPL");
477 MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
478