• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* gf128mul.c - GF(2^128) multiplication functions
2  *
3  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
4  * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
5  *
6  * Based on Dr Brian Gladman's (GPL'd) work published at
7  * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
8  * See the original copyright notice below.
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 2 of the License, or (at your option)
13  * any later version.
14  */
15 
16 /*
17  ---------------------------------------------------------------------------
18  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
19 
20  LICENSE TERMS
21 
22  The free distribution and use of this software in both source and binary
23  form is allowed (with or without changes) provided that:
24 
25    1. distributions of this source code include the above copyright
26       notice, this list of conditions and the following disclaimer;
27 
28    2. distributions in binary form include the above copyright
29       notice, this list of conditions and the following disclaimer
30       in the documentation and/or other associated materials;
31 
32    3. the copyright holder's name is not used to endorse products
33       built using this software without specific written permission.
34 
35  ALTERNATIVELY, provided that this notice is retained in full, this product
36  may be distributed under the terms of the GNU General Public License (GPL),
37  in which case the provisions of the GPL apply INSTEAD OF those given above.
38 
39  DISCLAIMER
40 
41  This software is provided 'as is' with no explicit or implied warranties
42  in respect of its properties, including, but not limited to, correctness
43  and/or fitness for purpose.
44  ---------------------------------------------------------------------------
45  Issue 31/01/2006
46 
47  This file provides fast multiplication in GF(2^128) as required by several
48  cryptographic authentication modes
49 */
50 
51 #include <crypto/gf128mul.h>
52 #include <linux/kernel.h>
53 #include <linux/module.h>
54 #include <linux/slab.h>
55 
56 #define gf128mul_dat(q) { \
57 	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
58 	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
59 	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
60 	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
61 	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
62 	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
63 	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
64 	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
65 	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
66 	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
67 	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
68 	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
69 	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
70 	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
71 	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
72 	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
73 	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
74 	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
75 	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
76 	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
77 	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
78 	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
79 	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
80 	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
81 	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
82 	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
83 	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
84 	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
85 	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
86 	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
87 	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
88 	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
89 }
90 
91 /*
92  * Given a value i in 0..255 as the byte overflow when a field element
93  * in GF(2^128) is multiplied by x^8, the following macro returns the
94  * 16-bit value that must be XOR-ed into the low-degree end of the
95  * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
96  *
97  * There are two versions of the macro, and hence two tables: one for
98  * the "be" convention where the highest-order bit is the coefficient of
99  * the highest-degree polynomial term, and one for the "le" convention
100  * where the highest-order bit is the coefficient of the lowest-degree
101  * polynomial term.  In both cases the values are stored in CPU byte
102  * endianness such that the coefficients are ordered consistently across
103  * bytes, i.e. in the "be" table bits 15..0 of the stored value
104  * correspond to the coefficients of x^15..x^0, and in the "le" table
105  * bits 15..0 correspond to the coefficients of x^0..x^15.
106  *
107  * Therefore, provided that the appropriate byte endianness conversions
108  * are done by the multiplication functions (and these must be in place
109  * anyway to support both little endian and big endian CPUs), the "be"
110  * table can be used for multiplications of both "bbe" and "ble"
111  * elements, and the "le" table can be used for multiplications of both
112  * "lle" and "lbe" elements.
113  */
114 
115 #define xda_be(i) ( \
116 	(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
117 	(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
118 	(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
119 	(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
120 )
121 
122 #define xda_le(i) ( \
123 	(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
124 	(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
125 	(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
126 	(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
127 )
128 
129 static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
130 static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
131 
132 /*
133  * The following functions multiply a field element by x^8 in
134  * the polynomial field representation.  They use 64-bit word operations
135  * to gain speed but compensate for machine endianness and hence work
136  * correctly on both styles of machine.
137  */
138 
gf128mul_x8_lle(be128 * x)139 static void gf128mul_x8_lle(be128 *x)
140 {
141 	u64 a = be64_to_cpu(x->a);
142 	u64 b = be64_to_cpu(x->b);
143 	u64 _tt = gf128mul_table_le[b & 0xff];
144 
145 	x->b = cpu_to_be64((b >> 8) | (a << 56));
146 	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
147 }
148 
gf128mul_x8_bbe(be128 * x)149 static void gf128mul_x8_bbe(be128 *x)
150 {
151 	u64 a = be64_to_cpu(x->a);
152 	u64 b = be64_to_cpu(x->b);
153 	u64 _tt = gf128mul_table_be[a >> 56];
154 
155 	x->a = cpu_to_be64((a << 8) | (b >> 56));
156 	x->b = cpu_to_be64((b << 8) ^ _tt);
157 }
158 
gf128mul_lle(be128 * r,const be128 * b)159 void gf128mul_lle(be128 *r, const be128 *b)
160 {
161 	be128 p[8];
162 	int i;
163 
164 	p[0] = *r;
165 	for (i = 0; i < 7; ++i)
166 		gf128mul_x_lle(&p[i + 1], &p[i]);
167 
168 	memset(r, 0, sizeof(*r));
169 	for (i = 0;;) {
170 		u8 ch = ((u8 *)b)[15 - i];
171 
172 		if (ch & 0x80)
173 			be128_xor(r, r, &p[0]);
174 		if (ch & 0x40)
175 			be128_xor(r, r, &p[1]);
176 		if (ch & 0x20)
177 			be128_xor(r, r, &p[2]);
178 		if (ch & 0x10)
179 			be128_xor(r, r, &p[3]);
180 		if (ch & 0x08)
181 			be128_xor(r, r, &p[4]);
182 		if (ch & 0x04)
183 			be128_xor(r, r, &p[5]);
184 		if (ch & 0x02)
185 			be128_xor(r, r, &p[6]);
186 		if (ch & 0x01)
187 			be128_xor(r, r, &p[7]);
188 
189 		if (++i >= 16)
190 			break;
191 
192 		gf128mul_x8_lle(r);
193 	}
194 }
195 EXPORT_SYMBOL(gf128mul_lle);
196 
gf128mul_bbe(be128 * r,const be128 * b)197 void gf128mul_bbe(be128 *r, const be128 *b)
198 {
199 	be128 p[8];
200 	int i;
201 
202 	p[0] = *r;
203 	for (i = 0; i < 7; ++i)
204 		gf128mul_x_bbe(&p[i + 1], &p[i]);
205 
206 	memset(r, 0, sizeof(*r));
207 	for (i = 0;;) {
208 		u8 ch = ((u8 *)b)[i];
209 
210 		if (ch & 0x80)
211 			be128_xor(r, r, &p[7]);
212 		if (ch & 0x40)
213 			be128_xor(r, r, &p[6]);
214 		if (ch & 0x20)
215 			be128_xor(r, r, &p[5]);
216 		if (ch & 0x10)
217 			be128_xor(r, r, &p[4]);
218 		if (ch & 0x08)
219 			be128_xor(r, r, &p[3]);
220 		if (ch & 0x04)
221 			be128_xor(r, r, &p[2]);
222 		if (ch & 0x02)
223 			be128_xor(r, r, &p[1]);
224 		if (ch & 0x01)
225 			be128_xor(r, r, &p[0]);
226 
227 		if (++i >= 16)
228 			break;
229 
230 		gf128mul_x8_bbe(r);
231 	}
232 }
233 EXPORT_SYMBOL(gf128mul_bbe);
234 
235 /*      This version uses 64k bytes of table space.
236     A 16 byte buffer has to be multiplied by a 16 byte key
237     value in GF(2^128).  If we consider a GF(2^128) value in
238     the buffer's lowest byte, we can construct a table of
239     the 256 16 byte values that result from the 256 values
240     of this byte.  This requires 4096 bytes. But we also
241     need tables for each of the 16 higher bytes in the
242     buffer as well, which makes 64 kbytes in total.
243 */
244 /* additional explanation
245  * t[0][BYTE] contains g*BYTE
246  * t[1][BYTE] contains g*x^8*BYTE
247  *  ..
248  * t[15][BYTE] contains g*x^120*BYTE */
gf128mul_init_64k_bbe(const be128 * g)249 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
250 {
251 	struct gf128mul_64k *t;
252 	int i, j, k;
253 
254 	t = kzalloc(sizeof(*t), GFP_KERNEL);
255 	if (!t)
256 		goto out;
257 
258 	for (i = 0; i < 16; i++) {
259 		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
260 		if (!t->t[i]) {
261 			gf128mul_free_64k(t);
262 			t = NULL;
263 			goto out;
264 		}
265 	}
266 
267 	t->t[0]->t[1] = *g;
268 	for (j = 1; j <= 64; j <<= 1)
269 		gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
270 
271 	for (i = 0;;) {
272 		for (j = 2; j < 256; j += j)
273 			for (k = 1; k < j; ++k)
274 				be128_xor(&t->t[i]->t[j + k],
275 					  &t->t[i]->t[j], &t->t[i]->t[k]);
276 
277 		if (++i >= 16)
278 			break;
279 
280 		for (j = 128; j > 0; j >>= 1) {
281 			t->t[i]->t[j] = t->t[i - 1]->t[j];
282 			gf128mul_x8_bbe(&t->t[i]->t[j]);
283 		}
284 	}
285 
286 out:
287 	return t;
288 }
289 EXPORT_SYMBOL(gf128mul_init_64k_bbe);
290 
gf128mul_free_64k(struct gf128mul_64k * t)291 void gf128mul_free_64k(struct gf128mul_64k *t)
292 {
293 	int i;
294 
295 	for (i = 0; i < 16; i++)
296 		kzfree(t->t[i]);
297 	kzfree(t);
298 }
299 EXPORT_SYMBOL(gf128mul_free_64k);
300 
gf128mul_64k_bbe(be128 * a,const struct gf128mul_64k * t)301 void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
302 {
303 	u8 *ap = (u8 *)a;
304 	be128 r[1];
305 	int i;
306 
307 	*r = t->t[0]->t[ap[15]];
308 	for (i = 1; i < 16; ++i)
309 		be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
310 	*a = *r;
311 }
312 EXPORT_SYMBOL(gf128mul_64k_bbe);
313 
314 /*      This version uses 4k bytes of table space.
315     A 16 byte buffer has to be multiplied by a 16 byte key
316     value in GF(2^128).  If we consider a GF(2^128) value in a
317     single byte, we can construct a table of the 256 16 byte
318     values that result from the 256 values of this byte.
319     This requires 4096 bytes. If we take the highest byte in
320     the buffer and use this table to get the result, we then
321     have to multiply by x^120 to get the final value. For the
322     next highest byte the result has to be multiplied by x^112
323     and so on. But we can do this by accumulating the result
324     in an accumulator starting with the result for the top
325     byte.  We repeatedly multiply the accumulator value by
326     x^8 and then add in (i.e. xor) the 16 bytes of the next
327     lower byte in the buffer, stopping when we reach the
328     lowest byte. This requires a 4096 byte table.
329 */
gf128mul_init_4k_lle(const be128 * g)330 struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
331 {
332 	struct gf128mul_4k *t;
333 	int j, k;
334 
335 	t = kzalloc(sizeof(*t), GFP_KERNEL);
336 	if (!t)
337 		goto out;
338 
339 	t->t[128] = *g;
340 	for (j = 64; j > 0; j >>= 1)
341 		gf128mul_x_lle(&t->t[j], &t->t[j+j]);
342 
343 	for (j = 2; j < 256; j += j)
344 		for (k = 1; k < j; ++k)
345 			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
346 
347 out:
348 	return t;
349 }
350 EXPORT_SYMBOL(gf128mul_init_4k_lle);
351 
gf128mul_init_4k_bbe(const be128 * g)352 struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
353 {
354 	struct gf128mul_4k *t;
355 	int j, k;
356 
357 	t = kzalloc(sizeof(*t), GFP_KERNEL);
358 	if (!t)
359 		goto out;
360 
361 	t->t[1] = *g;
362 	for (j = 1; j <= 64; j <<= 1)
363 		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
364 
365 	for (j = 2; j < 256; j += j)
366 		for (k = 1; k < j; ++k)
367 			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
368 
369 out:
370 	return t;
371 }
372 EXPORT_SYMBOL(gf128mul_init_4k_bbe);
373 
gf128mul_4k_lle(be128 * a,const struct gf128mul_4k * t)374 void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
375 {
376 	u8 *ap = (u8 *)a;
377 	be128 r[1];
378 	int i = 15;
379 
380 	*r = t->t[ap[15]];
381 	while (i--) {
382 		gf128mul_x8_lle(r);
383 		be128_xor(r, r, &t->t[ap[i]]);
384 	}
385 	*a = *r;
386 }
387 EXPORT_SYMBOL(gf128mul_4k_lle);
388 
gf128mul_4k_bbe(be128 * a,const struct gf128mul_4k * t)389 void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
390 {
391 	u8 *ap = (u8 *)a;
392 	be128 r[1];
393 	int i = 0;
394 
395 	*r = t->t[ap[0]];
396 	while (++i < 16) {
397 		gf128mul_x8_bbe(r);
398 		be128_xor(r, r, &t->t[ap[i]]);
399 	}
400 	*a = *r;
401 }
402 EXPORT_SYMBOL(gf128mul_4k_bbe);
403 
404 MODULE_LICENSE("GPL");
405 MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
406